Пример #1
0
def retinanet_mask_3D(inputs,
                      num_classes,
                      retinanet_model=None,
                      anchor_params=None,
                      nms=True,
                      class_specific_filter=True,
                      crop_size=(14, 14),
                      mask_size=(28, 28, 28),
                      name='retinanet-mask-3D',
                      roi_submodels=None,
                      mask_dtype=K.floatx(),
                      **kwargs):
    """Construct a RetinaNet mask model on top of a retinanet bbox model.
    Uses the retinanet bbox model and appends layers to compute masks.
    Args:
        inputs: List of tensorflow.keras.layers.Input.
            The first input is the image, the second input the blob of masks.
        num_classes: Integer, number of classes to classify.
        retinanet_model: deepcell.model_zoo.retinanet.retinanet model,
            returning regression and classification values.
        anchor_params: Struct containing anchor parameters.
        nms: Boolean, whether to use NMS.
        class_specific_filter: Boolean, use class specific filtering.
        roi_submodels: Submodels for processing ROIs.
        mask_dtype: Data type of the masks, can be different from the main one.
        name: Name of the model.
        **kwargs: Additional kwargs to pass to the retinanet bbox model.
    Returns:
        Model with inputs as input and as output the output of each submodel
        for each pyramid level and the detections. The order is as defined in
        submodels.
        ```
        [
            regression, classification, other[0], other[1], ...,
            boxes_masks, boxes, scores, labels, masks, other[0], other[1], ...
        ]
        ```
    """
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    if roi_submodels is None:
        retinanet_dtype = K.floatx()
        K.set_floatx(mask_dtype)
        roi_submodels = default_roi_submodels(num_classes, crop_size,
                                              mask_size, mask_dtype,
                                              retinanet_dtype)
        K.set_floatx(retinanet_dtype)

    image = inputs
    image_shape = Shape()(image)

    if retinanet_model is None:
        retinanet_model = retinanet(inputs=image,
                                    num_classes=num_classes,
                                    num_anchors=anchor_params.num_anchors(),
                                    **kwargs)

    # parse outputs
    regression = retinanet_model.outputs[0]
    classification = retinanet_model.outputs[1]
    other = retinanet_model.outputs[2:]
    features = [
        retinanet_model.get_layer(name).output
        for name in ['P3', 'P4', 'P5', 'P6', 'P7']
    ]

    # build boxes
    anchors = __build_anchors(anchor_params, features)
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([image, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=nms,
        class_specific_filter=class_specific_filter,
        max_detections=100,
        name='filtered_detections')([boxes, classification] + other)

    # split up in known outputs and "other"
    boxes = detections[0]
    scores = detections[1]

    # get the region of interest features
    roi_input = [image_shape, boxes, classification] + features
    rois = RoiAlign(crop_size=crop_size)(roi_input)

    # execute maskrcnn submodels
    maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels]

    # concatenate boxes for loss computation
    trainable_outputs = [
        ConcatenateBoxes(name=name)([boxes, output])
        for (name, _), output in zip(roi_submodels, maskrcnn_outputs)
    ]

    # reconstruct the new output
    outputs = [regression, classification] + other + trainable_outputs + \
        detections + maskrcnn_outputs

    return Model(inputs=inputs, outputs=outputs, name=name)
Пример #2
0
def retinanet_mask(inputs,
                   backbone_dict,
                   num_classes,
                   frames_per_batch=1,
                   backbone_levels=['C3', 'C4', 'C5'],
                   pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                   retinanet_model=None,
                   anchor_params=None,
                   nms=True,
                   panoptic=False,
                   class_specific_filter=True,
                   crop_size=(14, 14),
                   mask_size=(28, 28),
                   name='retinanet-mask',
                   roi_submodels=None,
                   max_detections=100,
                   score_threshold=0.05,
                   nms_threshold=0.5,
                   mask_dtype=K.floatx(),
                   **kwargs):
    """Construct a RetinaNet mask model on top of a retinanet bbox model.
    Uses the retinanet bbox model and appends layers to compute masks.

    Args:
        inputs (tensor): List of tensorflow.keras.layers.Input.
            The first input is the image, the second input the blob of masks.
        backbone_dict (dict): A dictionary with the backbone layers.
        num_classes (int): Integer, number of classes to classify.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        pyramid_levels (list): The pyramid levels to attach regression and
            classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7'].
        retinanet_model (tensorflow.keras.Model): RetinaNet model that predicts
            regression and classification values.
        anchor_params (AnchorParameters): Struct containing anchor parameters.
        nms (bool): Whether to use non-maximum suppression
            for the filtering step.
        panoptic (bool): Flag for adding the semantic head for panoptic
            segmentation tasks. Defaults to false.
        class_specific_filter (bool): Use class specific filtering.
        crop_size (tuple): 2-length tuple for the x-y size of the crops.
            Used to create default roi_submodels.
        mask_size (tuple): 2-length tuple for the x-y size of the masks.
            Used to create default roi_submodels.
        name (str): Name of the model.
        roi_submodels (list): Submodels for processing ROIs.
        max_detections (int): The maximum number of detections allowed.
        score_threshold (float): Minimum score for the FilterDetections layer.
        nms_threshold (float): Minimimum NMS for the FilterDetections layer.
        mask_dtype (str): Dtype to use for mask tensors.
        kwargs (dict): Additional kwargs to pass to the retinanet bbox model.

    Returns:
        tensorflow.keras.Model: Model with inputs as input and as output
            the output of each submodel for each pyramid level and the
            detections. The order is as defined in submodels.

            ```
            [
                regression, classification, other[0], ...,
                boxes_masks, boxes, scores, labels, masks, other[0], ...
            ]
            ```

    """
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    if roi_submodels is None:
        retinanet_dtype = K.floatx()
        K.set_floatx(mask_dtype)
        roi_submodels = default_roi_submodels(num_classes, crop_size,
                                              mask_size, frames_per_batch,
                                              mask_dtype, retinanet_dtype)
        K.set_floatx(retinanet_dtype)

    image = inputs
    image_shape = Shape()(image)

    if retinanet_model is None:
        retinanet_model = retinanet(inputs=image,
                                    backbone_dict=backbone_dict,
                                    num_classes=num_classes,
                                    backbone_levels=backbone_levels,
                                    pyramid_levels=pyramid_levels,
                                    panoptic=panoptic,
                                    num_anchors=anchor_params.num_anchors(),
                                    frames_per_batch=frames_per_batch,
                                    **kwargs)

    # parse outputs
    regression = retinanet_model.outputs[0]
    classification = retinanet_model.outputs[1]

    if panoptic:
        # Determine the number of semantic heads
        n_semantic_heads = len([
            1 for layer in retinanet_model.layers if 'semantic' in layer.name
        ])

        # The  panoptic output should not be sent to filter detections
        other = retinanet_model.outputs[2:-n_semantic_heads]
        semantic = retinanet_model.outputs[-n_semantic_heads:]
    else:
        other = retinanet_model.outputs[2:]

    features = [
        retinanet_model.get_layer(name).output for name in pyramid_levels
    ]

    # build boxes
    anchors = __build_anchors(anchor_params,
                              features,
                              frames_per_batch=frames_per_batch)
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([image, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=nms,
        nms_threshold=nms_threshold,
        score_threshold=score_threshold,
        class_specific_filter=class_specific_filter,
        max_detections=max_detections,
        name='filtered_detections')([boxes, classification] + other)

    # split up in known outputs and "other"
    boxes = detections[0]
    scores = detections[1]

    # get the region of interest features
    #
    # roi_input = [image_shape, boxes, classification] + features
    # rois = _RoiAlign(crop_size=crop_size)(roi_input)

    fpn = features[0]
    fpn = UpsampleLike()([fpn, image])
    rois = RoiAlign(crop_size=crop_size)([boxes, fpn])

    # execute maskrcnn submodels
    maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels]

    # concatenate boxes for loss computation
    trainable_outputs = [
        ConcatenateBoxes(name=name)([boxes, output])
        for (name, _), output in zip(roi_submodels, maskrcnn_outputs)
    ]

    # reconstruct the new output
    outputs = [regression, classification] + other + trainable_outputs + \
        detections + maskrcnn_outputs

    if panoptic:
        outputs += list(semantic)

    model = Model(inputs=inputs, outputs=outputs, name=name)
    model.backbone_levels = backbone_levels
    model.pyramid_levels = pyramid_levels

    return model
Пример #3
0
def retinanet_bbox(model=None,
                   nms=True,
                   panoptic=False,
                   num_semantic_heads=1,
                   class_specific_filter=True,
                   name='retinanet-bbox',
                   anchor_params=None,
                   **kwargs):
    """Construct a RetinaNet model on top of a backbone and adds convenience
    functions to output boxes directly.

    This model uses the minimum retinanet model and appends a few layers
    to compute boxes within the graph. These layers include applying the
    regression values to the anchors and performing NMS.

    Args:
        model (tensorflow.keras.Model): RetinaNet model to append bbox
            layers to. If None, it will create a RetinaNet model using kwargs.
        nms (bool): Whether to use non-maximum suppression
            for the filtering step.
        backbone_levels (list): Backbone levels to use for
            constructing retinanet.
        pyramid_levels (list): Pyramid levels to attach
            the object detection heads to.
        class_specific_filter (bool): Whether to use class specific filtering
            or filter for the best scoring class only.
        name (str): Name of the model.
        anchor_params (AnchorParameters): Struct containing anchor parameters.
            If None, default values are used.
        kwargs (dict): Additional kwargs to pass to the minimal retinanet model.

    Returns:
        tensorflow.keras.Model: A Model which takes an image as input and
            outputs the detections on the image.

            The order is defined as follows:

            ```
            [
                boxes, scores, labels, other[0], other[1], ...
            ]
            ```

    Raises:
        ValueError: the given model does not have a regression or
            classification submodel.

    """
    # if no anchor parameters are passed, use default values
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    # create RetinaNet model
    if model is None:
        model = retinanet(num_anchors=anchor_params.num_anchors(), **kwargs)
    else:
        names = ('regression', 'classification')
        if not all(output in model.output_names for output in names):
            raise ValueError('Input is not a training model (no `regression` '
                             'and `classification` outputs were found, '
                             'outputs are: {}).'.format(model.output_names))

    # compute the anchors
    features = [model.get_layer(l).output for l in model.pyramid_levels]
    anchors = __build_anchors(anchor_params, features)

    # we expect anchors, regression. and classification values as first output
    regression = model.outputs[0]
    classification = model.outputs[1]

    # "other" can be any additional output from custom submodels, by default []
    if panoptic:
        # The last output is the panoptic output, which should not be
        # sent to filter detections
        other = model.outputs[2:-num_semantic_heads]
        semantic = model.outputs[-num_semantic_heads:]
    else:
        other = model.outputs[2:]

    # apply predicted regression to anchors
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([model.inputs[0], boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=nms,
        class_specific_filter=class_specific_filter,
        name='filtered_detections')([boxes, classification] + other)

    # add the semantic head's output if needed
    if panoptic:
        outputs = detections + list(semantic)
    else:
        outputs = detections

    # construct the model
    return Model(inputs=model.inputs, outputs=outputs, name=name)
Пример #4
0
def retinamask_bbox(model,
                    nms=True,
                    panoptic=False,
                    num_semantic_heads=1,
                    class_specific_filter=True,
                    name='retinanet-bbox',
                    anchor_params=None,
                    max_detections=300,
                    frames_per_batch=1,
                    crop_size=(14, 14),
                    **kwargs):
    """Construct a RetinaNet model on top of a backbone and adds convenience
    functions to output boxes directly.
    This model uses the minimum retinanet model and appends a few layers
    to compute boxes within the graph. These layers include applying the
    regression values to the anchors and performing NMS.

    Args:
        model (tensorflow.keras.Model): RetinaNet model to append bbox
            layers to. If ``None``, it will create a ``RetinaNet`` model
            using ``kwargs``.
        nms (bool): Whether to use non-maximum suppression
            for the filtering step.
        panoptic (bool): Flag for adding the semantic head for panoptic
            segmentation tasks.
        num_semantic_heads (int): Total number of semantic heads to build.
        class_specific_filter (bool): Whether to use class specific filtering
            or filter for the best scoring class only.
        anchor_params (AnchorParameters): Struct containing anchor parameters.
        max_detections (int): The maximum number of detections allowed.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        crop_size (tuple): 2-length tuple for the x-y size of the crops.
            Used to create default ``roi_submodels``.
        kwargs (dict): Additional kwargs to pass to the
            :mod:`deepcell.model_zoo.retinanet.retinanet` model.

    Returns:
        tensorflow.keras.Model: A Model which takes an image as input and
        outputs the detections on the image.
        The order is defined as follows:

        .. code-block:: python

            [
                boxes, scores, labels, other[0], other[1], ...
            ]

    Raises:
        ValueError: the given model does not have a regression or
            classification submodel.
    """

    # if no anchor parameters are passed, use default values
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    # create RetinaNet model
    names = ('regression', 'classification')
    if not all(output in model.output_names for output in names):
        raise ValueError('Input is not a training model (no `regression` '
                         'and `classification` outputs were found, '
                         'outputs are: {}).'.format(model.output_names))

    # compute the anchors
    features = [model.get_layer(l).output for l in model.pyramid_levels]
    anchors = __build_anchors(anchor_params,
                              features,
                              frames_per_batch=frames_per_batch)

    # we expect anchors, regression. and classification values as first output
    regression = model.outputs[0]
    classification = model.outputs[1]
    semantic_classes = [
        1 for layer in model.layers if layer.name.startswith('semantic')
    ]

    # "other" can be any additional output from custom submodels, by default []
    if panoptic:
        # The last output is the panoptic output, which should not be
        # sent to filter detections
        num_semantic_heads = len(semantic_classes)
        other = model.outputs[2:-num_semantic_heads]
        semantic = model.outputs[-num_semantic_heads:]
    else:
        other = model.outputs[2:]
        semantic = []

    # apply predicted regression to anchors
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([model.inputs[0], boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=nms,
        class_specific_filter=class_specific_filter,
        max_detections=max_detections,
        name='filtered_detections')([boxes, classification])

    # apply submodels to detections
    image = model.layers[0].output
    boxes = detections[0]

    fpn = features[0]
    fpn = UpsampleLike()([fpn, image])
    rois = RoiAlign(crop_size=crop_size)([boxes, fpn])

    mask_submodel = model.get_layer('mask_submodel')
    masks = [mask_submodel(rois)]

    # add the semantic head's output if needed
    outputs = detections + list(masks) + list(semantic)

    # construct the model
    new_model = Model(inputs=model.inputs, outputs=outputs, name=name)

    image_input = model.inputs[0]
    shape = (1, 1, 4) if frames_per_batch == 1 else (1, 1, 1, 4)
    temp_boxes = K.zeros(shape, name='temp_boxes')
    new_inputs = [image_input, temp_boxes]

    final_model = new_model(new_inputs)
    return Model(inputs=image_input, outputs=final_model)
Пример #5
0
def retinanet_bbox(model=None,
                   nms=True,
                   class_specific_filter=True,
                   name='retinanet-bbox',
                   anchor_params=None,
                   **kwargs):
    """Construct a RetinaNet model on top of a backbone and adds convenience
    functions to output boxes directly.

    This model uses the minimum retinanet model and appends a few layers to
    compute boxes within the graph. These layers include applying the regression
    values to the anchors and performing NMS.

    Args:
        model: RetinaNet model to append bbox layers to.
            If None, it will create a RetinaNet model using **kwargs.
        nms: Whether to use non-maximum suppression for the filtering step.
        class_specific_filter: Whether to use class specific filtering or
            filter for the best scoring class only.
        name: Name of the model.
        anchor_params: Struct containing anchor parameters.
            If None, default values are used.
        *kwargs: Additional kwargs to pass to the minimal retinanet model.

    Returns:
        A Model which takes an image as input and
        outputs the detections on the image.

        The order is defined as follows:
        ```
        [
            boxes, scores, labels, other[0], other[1], ...
        ]
        ```
    """
    # if no anchor parameters are passed, use default values
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    # create RetinaNet model
    if model is None:
        model = retinanet(num_anchors=anchor_params.num_anchors(), **kwargs)
    else:
        names = ('regression', 'classification')
        if not all(output in model.output_names for output in names):
            raise ValueError('Input is not a training model (no `regression` '
                             'and `classification` outputs were found, '
                             'outputs are: {}).'.format(model.output_names))

    # compute the anchors
    p_names = ['P3', 'P4', 'P5', 'P6', 'P7']
    features = [model.get_layer(p_name).output for p_name in p_names]
    anchors = __build_anchors(anchor_params, features)

    # we expect the anchors, regression and classification values as first output
    regression = model.outputs[0]
    classification = model.outputs[1]

    # "other" can be any additional output from custom submodels, by default this will be []
    other = model.outputs[2:]

    # apply predicted regression to anchors
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([model.inputs[0], boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=nms,
        class_specific_filter=class_specific_filter,
        name='filtered_detections')([boxes, classification] + other)

    # construct the model
    return Model(inputs=model.inputs, outputs=detections, name=name)