Пример #1
0
def retinanet(inputs,
              backbone_dict,
              num_classes,
              backbone_levels=['C3', 'C4', 'C5'],
              pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
              num_anchors=None,
              create_pyramid_features=__create_pyramid_features,
              create_semantic_head=__create_semantic_head,
              panoptic=False,
              num_semantic_heads=1,
              num_semantic_classes=[3],
              submodels=None,
              frames_per_batch=1,
              semantic_only=False,
              name='retinanet'):
    """Construct a ``RetinaNet`` model on top of a backbone.

    This model is the minimum model necessary for training
    (with the unfortunate exception of anchors as output).

    Args:
        inputs (tensor): The inputs to the network.
        backbone_dict (dict): A dictionary with the backbone layers.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid.
        pyramid_levels (list): The pyramid levels to attach regression and
            classification heads.
        num_classes (int): Number of classes to classify.
        num_anchors (int): Number of base anchors.
        create_pyramid_features (function): Function to create pyramid features.
        create_semantic_head (function): Function for creating a semantic head,
            which can be used for panoptic segmentation tasks.
        panoptic (bool): Flag for adding the semantic head for panoptic
            segmentation tasks.
        num_semantic_heads (int): The number of semantic segmentation heads.
        num_semantic_classes (list): The number of classes for the semantic
            segmentation part of panoptic segmentation tasks.
        submodels (list): Submodels to run on each feature map
            (default is regression and classification submodels).
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        name (str): Name of the model.

    Returns:
        tensorflow.keras.Model: A Model which takes an image as input
        and outputs generated anchors and the result from each submodel on
        every pyramid level.

        The order of the outputs is as defined in submodels:

        .. code-block:: python

            [
                regression, classification, other[0], other[1], ...
            ]

    """
    if num_anchors is None:
        num_anchors = AnchorParameters.default.num_anchors()

    if submodels is None:
        submodels = default_submodels(num_classes,
                                      num_anchors,
                                      frames_per_batch=frames_per_batch)

    if not isinstance(num_semantic_classes, list):
        num_semantic_classes = list(num_semantic_classes)

    # compute pyramid features as per https://arxiv.org/abs/1708.02002

    # Use only the desired backbone levels to create the feature pyramid
    backbone_dict_reduced = {
        k: backbone_dict[k]
        for k in backbone_dict if k in backbone_levels
    }
    pyramid_dict = create_pyramid_features(
        backbone_dict_reduced, ndim=3 if frames_per_batch > 1 else 2)

    # for the desired pyramid levels, run available submodels
    features = [pyramid_dict[key] for key in pyramid_levels]
    object_head = __build_pyramid(submodels, features)

    if panoptic:
        semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
        target_level = min(semantic_levels)

        semantic_head_list = []
        for i in range(num_semantic_heads):
            semantic_head_list.append(
                create_semantic_head(pyramid_dict,
                                     n_classes=num_semantic_classes[i],
                                     input_target=inputs,
                                     target_level=target_level,
                                     semantic_id=i,
                                     ndim=3 if frames_per_batch > 1 else 2))

        outputs = object_head + semantic_head_list
    else:
        outputs = object_head

    if semantic_only:
        outputs = semantic_head_list

    model = Model(inputs=inputs, outputs=outputs, name=name)
    model.backbone_levels = backbone_levels
    model.pyramid_levels = pyramid_levels

    return model
Пример #2
0
def retinamask(inputs,
               backbone_dict,
               num_classes,
               frames_per_batch=1,
               backbone_levels=['C3', 'C4', 'C5'],
               pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
               retinanet_model=None,
               anchor_params=None,
               nms=True,
               training=True,
               panoptic=False,
               class_specific_filter=True,
               crop_size=(14, 14),
               mask_size=(28, 28),
               name='retinanet-mask',
               roi_submodels=None,
               max_detections=100,
               score_threshold=0.05,
               nms_threshold=0.5,
               mask_dtype=K.floatx(),
               **kwargs):
    """Construct a masking model by appending layers to compute masks to a
    :mod:`deepcell.model_zoo.retinanet.retinanet` model.

    Args:
        inputs (tensor): List of ``tensorflow.keras.layers.Input``.
            The first input is the image, the second input the blob of masks.
        backbone_dict (dict): A dictionary with the backbone layers.
        num_classes (int): Integer, number of classes to classify.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        backbone_levels (list): The backbone levels to be used
            to create the feature pyramid.
        pyramid_levels (list): The pyramid levels to attach regression and
            classification heads to.
        retinanet_model (tensorflow.keras.Model):
            :mod:`deepcell.model_zoo.retinanet.retinanet` model that
            predicts regression and classification values.
        anchor_params (AnchorParameters): Struct containing anchor parameters.
        nms (bool): Whether to use non-maximum suppression
            for the filtering step.
        training (bool): Whether to use the bounding boxes as the detections,
            during training or to use the
            :mod:`deepcell.layers.filter_detections.FilterDetections`
            during inference.
        panoptic (bool): Flag for adding the semantic head for panoptic
            segmentation tasks.
        class_specific_filter (bool): Use class specific filtering.
        crop_size (tuple): 2-length tuple for the x-y size of the crops.
            Used to create default ``roi_submodels``.
        mask_size (tuple): 2-length tuple for the x-y size of the masks.
            Used to create default ``roi_submodels``.
        name (str): Name of the model.
        roi_submodels (list): Submodels for processing ROIs.
        max_detections (int): The maximum number of detections allowed.
        score_threshold (float): Minimum score for the
            :mod:`deepcell.layers.filter_detections.FilterDetections` layer.
        nms_threshold (float): Minimimum NMS for the
            :mod:`deepcell.layers.filter_detections.FilterDetections` layer.
        mask_dtype (str): ``dtype`` to use for mask tensors.
        kwargs (dict): Additional kwargs to pass to the
            :mod:`deepcell.model_zoo.retinanet.retinanet` model.

    Returns:
        tensorflow.keras.Model: Model with inputs as input and as output
        the output of each submodel for each pyramid level and the
        detections. The order is as defined in submodels.

        .. code-block:: python

            [
                regression, classification, other[0], ...,
                boxes_masks, boxes, scores, labels, masks, other[0], ...
            ]
    """
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    if roi_submodels is None:
        retinanet_dtype = K.floatx()
        K.set_floatx(mask_dtype)
        roi_submodels = default_roi_submodels(num_classes, crop_size,
                                              mask_size, frames_per_batch,
                                              mask_dtype, retinanet_dtype)
        K.set_floatx(retinanet_dtype)

    image = inputs
    if retinanet_model is None:
        retinanet_model = retinanet(inputs=image,
                                    backbone_dict=backbone_dict,
                                    num_classes=num_classes,
                                    backbone_levels=backbone_levels,
                                    pyramid_levels=pyramid_levels,
                                    panoptic=panoptic,
                                    num_anchors=anchor_params.num_anchors(),
                                    frames_per_batch=frames_per_batch,
                                    **kwargs)

    # parse outputs
    regression = retinanet_model.outputs[0]
    classification = retinanet_model.outputs[1]
    semantic_classes = [
        1 for layer in retinanet_model.layers
        if layer.name.startswith('semantic')
    ]

    if panoptic:
        # Determine the number of semantic heads
        n_semantic_heads = len(semantic_classes)

        # The  panoptic output should not be sent to filter detections
        other = retinanet_model.outputs[2:-n_semantic_heads]
        semantic = retinanet_model.outputs[-n_semantic_heads:]
    else:
        other = retinanet_model.outputs[2:]
        semantic = []

    features = [
        retinanet_model.get_layer(name).output for name in pyramid_levels
    ]

    # build boxes
    anchors = __build_anchors(anchor_params,
                              features,
                              frames_per_batch=frames_per_batch)
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([image, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    if training:
        if frames_per_batch == 1:
            boxes = Input(shape=(None, 4), name='boxes_input')
        else:
            boxes = Input(shape=(None, None, 4), name='boxes_input')
        detections = []

    else:
        detections = FilterDetections(
            nms=nms,
            nms_threshold=nms_threshold,
            score_threshold=score_threshold,
            class_specific_filter=class_specific_filter,
            max_detections=max_detections,
            name='filtered_detections')([boxes, classification] + other)

        # split up in known outputs and "other"
        boxes = detections[0]

    fpn = features[0]
    fpn = UpsampleLike()([fpn, image])
    rois = RoiAlign(crop_size=crop_size)([boxes, fpn])

    # execute maskrcnn submodels
    maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels]

    # concatenate boxes for loss computation
    trainable_outputs = [
        ConcatenateBoxes(name=name)([boxes, output])
        for (name, _), output in zip(roi_submodels, maskrcnn_outputs)
    ]

    # reconstruct the new output
    outputs = [regression, classification] + other + trainable_outputs + \
        detections + maskrcnn_outputs + list(semantic)

    inputs = [image, boxes] if training else image
    model = Model(inputs=inputs, outputs=outputs, name=name)

    model.backbone_levels = backbone_levels
    model.pyramid_levels = pyramid_levels
    return model