def retinanet_mask_3D(inputs, num_classes, retinanet_model=None, anchor_params=None, nms=True, class_specific_filter=True, crop_size=(14, 14), mask_size=(28, 28, 28), name='retinanet-mask-3D', roi_submodels=None, mask_dtype=K.floatx(), **kwargs): """Construct a RetinaNet mask model on top of a retinanet bbox model. Uses the retinanet bbox model and appends layers to compute masks. Args: inputs: List of tensorflow.keras.layers.Input. The first input is the image, the second input the blob of masks. num_classes: Integer, number of classes to classify. retinanet_model: deepcell.model_zoo.retinanet.retinanet model, returning regression and classification values. anchor_params: Struct containing anchor parameters. nms: Boolean, whether to use NMS. class_specific_filter: Boolean, use class specific filtering. roi_submodels: Submodels for processing ROIs. mask_dtype: Data type of the masks, can be different from the main one. name: Name of the model. **kwargs: Additional kwargs to pass to the retinanet bbox model. Returns: Model with inputs as input and as output the output of each submodel for each pyramid level and the detections. The order is as defined in submodels. ``` [ regression, classification, other[0], other[1], ..., boxes_masks, boxes, scores, labels, masks, other[0], other[1], ... ] ``` """ if anchor_params is None: anchor_params = AnchorParameters.default if roi_submodels is None: retinanet_dtype = K.floatx() K.set_floatx(mask_dtype) roi_submodels = default_roi_submodels(num_classes, crop_size, mask_size, mask_dtype, retinanet_dtype) K.set_floatx(retinanet_dtype) image = inputs image_shape = Shape()(image) if retinanet_model is None: retinanet_model = retinanet(inputs=image, num_classes=num_classes, num_anchors=anchor_params.num_anchors(), **kwargs) # parse outputs regression = retinanet_model.outputs[0] classification = retinanet_model.outputs[1] other = retinanet_model.outputs[2:] features = [ retinanet_model.get_layer(name).output for name in ['P3', 'P4', 'P5', 'P6', 'P7'] ] # build boxes anchors = __build_anchors(anchor_params, features) boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([image, boxes]) # filter detections (apply NMS / score threshold / select top-k) detections = FilterDetections( nms=nms, class_specific_filter=class_specific_filter, max_detections=100, name='filtered_detections')([boxes, classification] + other) # split up in known outputs and "other" boxes = detections[0] scores = detections[1] # get the region of interest features roi_input = [image_shape, boxes, classification] + features rois = RoiAlign(crop_size=crop_size)(roi_input) # execute maskrcnn submodels maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels] # concatenate boxes for loss computation trainable_outputs = [ ConcatenateBoxes(name=name)([boxes, output]) for (name, _), output in zip(roi_submodels, maskrcnn_outputs) ] # reconstruct the new output outputs = [regression, classification] + other + trainable_outputs + \ detections + maskrcnn_outputs return Model(inputs=inputs, outputs=outputs, name=name)
def retinanet_mask(inputs, backbone_dict, num_classes, frames_per_batch=1, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], retinanet_model=None, anchor_params=None, nms=True, panoptic=False, class_specific_filter=True, crop_size=(14, 14), mask_size=(28, 28), name='retinanet-mask', roi_submodels=None, max_detections=100, score_threshold=0.05, nms_threshold=0.5, mask_dtype=K.floatx(), **kwargs): """Construct a RetinaNet mask model on top of a retinanet bbox model. Uses the retinanet bbox model and appends layers to compute masks. Args: inputs (tensor): List of tensorflow.keras.layers.Input. The first input is the image, the second input the blob of masks. backbone_dict (dict): A dictionary with the backbone layers. num_classes (int): Integer, number of classes to classify. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. pyramid_levels (list): The pyramid levels to attach regression and classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7']. retinanet_model (tensorflow.keras.Model): RetinaNet model that predicts regression and classification values. anchor_params (AnchorParameters): Struct containing anchor parameters. nms (bool): Whether to use non-maximum suppression for the filtering step. panoptic (bool): Flag for adding the semantic head for panoptic segmentation tasks. Defaults to false. class_specific_filter (bool): Use class specific filtering. crop_size (tuple): 2-length tuple for the x-y size of the crops. Used to create default roi_submodels. mask_size (tuple): 2-length tuple for the x-y size of the masks. Used to create default roi_submodels. name (str): Name of the model. roi_submodels (list): Submodels for processing ROIs. max_detections (int): The maximum number of detections allowed. score_threshold (float): Minimum score for the FilterDetections layer. nms_threshold (float): Minimimum NMS for the FilterDetections layer. mask_dtype (str): Dtype to use for mask tensors. kwargs (dict): Additional kwargs to pass to the retinanet bbox model. Returns: tensorflow.keras.Model: Model with inputs as input and as output the output of each submodel for each pyramid level and the detections. The order is as defined in submodels. ``` [ regression, classification, other[0], ..., boxes_masks, boxes, scores, labels, masks, other[0], ... ] ``` """ if anchor_params is None: anchor_params = AnchorParameters.default if roi_submodels is None: retinanet_dtype = K.floatx() K.set_floatx(mask_dtype) roi_submodels = default_roi_submodels(num_classes, crop_size, mask_size, frames_per_batch, mask_dtype, retinanet_dtype) K.set_floatx(retinanet_dtype) image = inputs image_shape = Shape()(image) if retinanet_model is None: retinanet_model = retinanet(inputs=image, backbone_dict=backbone_dict, num_classes=num_classes, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, panoptic=panoptic, num_anchors=anchor_params.num_anchors(), frames_per_batch=frames_per_batch, **kwargs) # parse outputs regression = retinanet_model.outputs[0] classification = retinanet_model.outputs[1] if panoptic: # Determine the number of semantic heads n_semantic_heads = len([ 1 for layer in retinanet_model.layers if 'semantic' in layer.name ]) # The panoptic output should not be sent to filter detections other = retinanet_model.outputs[2:-n_semantic_heads] semantic = retinanet_model.outputs[-n_semantic_heads:] else: other = retinanet_model.outputs[2:] features = [ retinanet_model.get_layer(name).output for name in pyramid_levels ] # build boxes anchors = __build_anchors(anchor_params, features, frames_per_batch=frames_per_batch) boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([image, boxes]) # filter detections (apply NMS / score threshold / select top-k) detections = FilterDetections( nms=nms, nms_threshold=nms_threshold, score_threshold=score_threshold, class_specific_filter=class_specific_filter, max_detections=max_detections, name='filtered_detections')([boxes, classification] + other) # split up in known outputs and "other" boxes = detections[0] scores = detections[1] # get the region of interest features # # roi_input = [image_shape, boxes, classification] + features # rois = _RoiAlign(crop_size=crop_size)(roi_input) fpn = features[0] fpn = UpsampleLike()([fpn, image]) rois = RoiAlign(crop_size=crop_size)([boxes, fpn]) # execute maskrcnn submodels maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels] # concatenate boxes for loss computation trainable_outputs = [ ConcatenateBoxes(name=name)([boxes, output]) for (name, _), output in zip(roi_submodels, maskrcnn_outputs) ] # reconstruct the new output outputs = [regression, classification] + other + trainable_outputs + \ detections + maskrcnn_outputs if panoptic: outputs += list(semantic) model = Model(inputs=inputs, outputs=outputs, name=name) model.backbone_levels = backbone_levels model.pyramid_levels = pyramid_levels return model