def retinanet(inputs, backbone_dict, num_classes, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], num_anchors=None, create_pyramid_features=__create_pyramid_features, create_semantic_head=__create_semantic_head, panoptic=False, num_semantic_heads=1, num_semantic_classes=[3], submodels=None, name='retinanet'): """Construct a RetinaNet model on top of a backbone. This model is the minimum model necessary for training (with the unfortunate exception of anchors as output). Args: inputs (tensor): The inputs to the network. backbone_dict (dict): A dictionary with the backbone layers backbone_levels (list): The backbone levels to be used to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'] pyramid_levels (list): The pyramid levels to attach regression and classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7'] num_classes (int): Number of classes to classify. num_anchors (int): Number of base anchors. create_pyramid_features (function): Function to create pyramid features. create_symantic_head (function): Function for creating a semantic head, which can be used for panoptic segmentation tasks panoptic (bool): Flag for adding the semantic head for panoptic segmentation tasks. Defaults to false. num_semantic_classes (int): The number of classes for the semantic segmentation part of panoptic segmentation tasks. Defaults to 3. submodels (list): Submodels to run on each feature map (default is regression and classification submodels). name (str): Name of the model. Returns: tensorflow.keras.Model: A Model which takes an image as input and outputs generated anchors and the result from each submodel on every pyramid level. The order of the outputs is as defined in submodels: ``` [ regression, classification, other[0], other[1], ... ] ``` """ if num_anchors is None: num_anchors = AnchorParameters.default.num_anchors() if submodels is None: submodels = default_submodels(num_classes, num_anchors) if not isinstance(num_semantic_classes, list): num_semantic_classes = list(num_semantic_classes) # compute pyramid features as per https://arxiv.org/abs/1708.02002 # Use only the desired backbone levels to create the feature pyramid backbone_dict_reduced = { k: backbone_dict[k] for k in backbone_dict if k in backbone_levels } pyramid_dict = create_pyramid_features(backbone_dict_reduced) # for the desired pyramid levels, run available submodels features = [pyramid_dict[key] for key in pyramid_levels] object_head = __build_pyramid(submodels, features) if panoptic: semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict] target_level = min(semantic_levels) semantic_head_list = [] for i in range(num_semantic_heads): semantic_head_list.append( create_semantic_head(pyramid_dict, n_classes=num_semantic_classes[i], input_target=inputs, target_level=target_level, semantic_id=i)) outputs = object_head + semantic_head_list else: outputs = object_head model = Model(inputs=inputs, outputs=outputs, name=name) model.backbone_levels = backbone_levels model.pyramid_levels = pyramid_levels return model
def retinanet_mask(inputs, backbone_dict, num_classes, frames_per_batch=1, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], retinanet_model=None, anchor_params=None, nms=True, panoptic=False, class_specific_filter=True, crop_size=(14, 14), mask_size=(28, 28), name='retinanet-mask', roi_submodels=None, max_detections=100, score_threshold=0.05, nms_threshold=0.5, mask_dtype=K.floatx(), **kwargs): """Construct a RetinaNet mask model on top of a retinanet bbox model. Uses the retinanet bbox model and appends layers to compute masks. Args: inputs (tensor): List of tensorflow.keras.layers.Input. The first input is the image, the second input the blob of masks. backbone_dict (dict): A dictionary with the backbone layers. num_classes (int): Integer, number of classes to classify. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. pyramid_levels (list): The pyramid levels to attach regression and classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7']. retinanet_model (tensorflow.keras.Model): RetinaNet model that predicts regression and classification values. anchor_params (AnchorParameters): Struct containing anchor parameters. nms (bool): Whether to use non-maximum suppression for the filtering step. panoptic (bool): Flag for adding the semantic head for panoptic segmentation tasks. Defaults to false. class_specific_filter (bool): Use class specific filtering. crop_size (tuple): 2-length tuple for the x-y size of the crops. Used to create default roi_submodels. mask_size (tuple): 2-length tuple for the x-y size of the masks. Used to create default roi_submodels. name (str): Name of the model. roi_submodels (list): Submodels for processing ROIs. max_detections (int): The maximum number of detections allowed. score_threshold (float): Minimum score for the FilterDetections layer. nms_threshold (float): Minimimum NMS for the FilterDetections layer. mask_dtype (str): Dtype to use for mask tensors. kwargs (dict): Additional kwargs to pass to the retinanet bbox model. Returns: tensorflow.keras.Model: Model with inputs as input and as output the output of each submodel for each pyramid level and the detections. The order is as defined in submodels. ``` [ regression, classification, other[0], ..., boxes_masks, boxes, scores, labels, masks, other[0], ... ] ``` """ if anchor_params is None: anchor_params = AnchorParameters.default if roi_submodels is None: retinanet_dtype = K.floatx() K.set_floatx(mask_dtype) roi_submodels = default_roi_submodels(num_classes, crop_size, mask_size, frames_per_batch, mask_dtype, retinanet_dtype) K.set_floatx(retinanet_dtype) image = inputs image_shape = Shape()(image) if retinanet_model is None: retinanet_model = retinanet(inputs=image, backbone_dict=backbone_dict, num_classes=num_classes, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, panoptic=panoptic, num_anchors=anchor_params.num_anchors(), frames_per_batch=frames_per_batch, **kwargs) # parse outputs regression = retinanet_model.outputs[0] classification = retinanet_model.outputs[1] if panoptic: # Determine the number of semantic heads n_semantic_heads = len([ 1 for layer in retinanet_model.layers if 'semantic' in layer.name ]) # The panoptic output should not be sent to filter detections other = retinanet_model.outputs[2:-n_semantic_heads] semantic = retinanet_model.outputs[-n_semantic_heads:] else: other = retinanet_model.outputs[2:] features = [ retinanet_model.get_layer(name).output for name in pyramid_levels ] # build boxes anchors = __build_anchors(anchor_params, features, frames_per_batch=frames_per_batch) boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([image, boxes]) # filter detections (apply NMS / score threshold / select top-k) detections = FilterDetections( nms=nms, nms_threshold=nms_threshold, score_threshold=score_threshold, class_specific_filter=class_specific_filter, max_detections=max_detections, name='filtered_detections')([boxes, classification] + other) # split up in known outputs and "other" boxes = detections[0] scores = detections[1] # get the region of interest features # # roi_input = [image_shape, boxes, classification] + features # rois = _RoiAlign(crop_size=crop_size)(roi_input) fpn = features[0] fpn = UpsampleLike()([fpn, image]) rois = RoiAlign(crop_size=crop_size)([boxes, fpn]) # execute maskrcnn submodels maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels] # concatenate boxes for loss computation trainable_outputs = [ ConcatenateBoxes(name=name)([boxes, output]) for (name, _), output in zip(roi_submodels, maskrcnn_outputs) ] # reconstruct the new output outputs = [regression, classification] + other + trainable_outputs + \ detections + maskrcnn_outputs if panoptic: outputs += list(semantic) model = Model(inputs=inputs, outputs=outputs, name=name) model.backbone_levels = backbone_levels model.pyramid_levels = pyramid_levels return model
def retinanet_mask(inputs, backbone_dict, num_classes, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], retinanet_model=None, anchor_params=None, nms=True, panoptic=False, class_specific_filter=True, crop_size=(14, 14), mask_size=(28, 28), name='retinanet-mask', roi_submodels=None, max_detections=100, mask_dtype=K.floatx(), **kwargs): """Construct a RetinaNet mask model on top of a retinanet bbox model. Uses the retinanet bbox model and appends layers to compute masks. Args: inputs: List of tensorflow.keras.layers.Input. The first input is the image, the second input the blob of masks. num_classes: Integer, number of classes to classify. retinanet_model: deepcell.model_zoo.retinanet.retinanet model, returning regression and classification values. anchor_params: Struct containing anchor parameters. nms: Boolean, whether to use NMS. class_specific_filter: Boolean, use class specific filtering. roi_submodels: Submodels for processing ROIs. mask_dtype: Data type of the masks, can be different from the main one. name: Name of the model. **kwargs: Additional kwargs to pass to the retinanet bbox model. Returns: Model with inputs as input and as output the output of each submodel for each pyramid level and the detections. The order is as defined in submodels. ``` [ regression, classification, other[0], other[1], ..., boxes_masks, boxes, scores, labels, masks, other[0], other[1], ... ] ``` """ if anchor_params is None: anchor_params = AnchorParameters.default if roi_submodels is None: retinanet_dtype = K.floatx() K.set_floatx(mask_dtype) roi_submodels = default_roi_submodels(num_classes, crop_size, mask_size, mask_dtype, retinanet_dtype) K.set_floatx(retinanet_dtype) image = inputs image_shape = Shape()(image) if retinanet_model is None: retinanet_model = retinanet(inputs=image, backbone_dict=backbone_dict, num_classes=num_classes, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, panoptic=panoptic, num_anchors=anchor_params.num_anchors(), **kwargs) # parse outputs regression = retinanet_model.outputs[0] classification = retinanet_model.outputs[1] if panoptic: # Determine the number of semantic heads n_semantic_heads = len([ 1 for layer in retinanet_model.layers if 'semantic' in layer.name ]) # The panoptic output should not be sent to filter detections other = retinanet_model.outputs[2:-n_semantic_heads] semantic = retinanet_model.outputs[-n_semantic_heads:] else: other = retinanet_model.outputs[2:] features = [ retinanet_model.get_layer(name).output for name in pyramid_levels ] # build boxes anchors = __build_anchors(anchor_params, features) boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([image, boxes]) # filter detections (apply NMS / score threshold / select top-k) detections = FilterDetections( nms=nms, class_specific_filter=class_specific_filter, max_detections=max_detections, name='filtered_detections')([boxes, classification] + other) # split up in known outputs and "other" boxes = detections[0] scores = detections[1] # get the region of interest features roi_input = [image_shape, boxes, classification] + features rois = RoiAlign(crop_size=crop_size)(roi_input) # execute maskrcnn submodels maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels] # concatenate boxes for loss computation trainable_outputs = [ ConcatenateBoxes(name=name)([boxes, output]) for (name, _), output in zip(roi_submodels, maskrcnn_outputs) ] # reconstruct the new output outputs = [regression, classification] + other + trainable_outputs + \ detections + maskrcnn_outputs if panoptic: outputs += list(semantic) model = Model(inputs=inputs, outputs=outputs, name=name) model.backbone_levels = backbone_levels model.pyramid_levels = pyramid_levels return model