def test_serialize_deserialize(self): tflite_post_processing_config = { 'max_detections': 100, 'max_classes_per_detection': 1, 'use_regular_nms': True, 'nms_score_threshold': 0.01, 'nms_iou_threshold': 0.5 } kwargs = { 'apply_nms': True, 'pre_nms_top_k': 1000, 'pre_nms_score_threshold': 0.1, 'nms_iou_threshold': 0.5, 'max_num_detections': 10, 'nms_version': 'v2', 'use_cpu_nms': False, 'soft_nms_sigma': None, 'tflite_post_processing_config': tflite_post_processing_config } generator = detection_generator.MultilevelDetectionGenerator(**kwargs) expected_config = dict(kwargs) self.assertEqual(generator.get_config(), expected_config) new_generator = ( detection_generator.MultilevelDetectionGenerator.from_config( generator.get_config())) self.assertAllEqual(generator.get_config(), new_generator.get_config())
def test_serialize_deserialize(self): """Validate the network can be serialized and deserialized.""" num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] num_anchors_per_location = num_scales * len(aspect_ratios) backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN( input_specs=backbone.output_specs, min_level=min_level, max_level=max_level) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, num_anchors_per_location=num_anchors_per_location) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel( backbone=backbone, decoder=decoder, head=head, detection_generator=generator, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3) config = model.get_config() new_model = retinanet_model.RetinaNetModel.from_config(config) # Validate that the config can be forced to JSON. _ = new_model.to_json() # If the serialization was successful, the new config should match the old. self.assertAllEqual(model.get_config(), new_model.get_config())
def build_retinanet( input_specs: tf.keras.layers.InputSpec, model_config: retinanet_cfg.RetinaNet, l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None, backbone: Optional[tf.keras.Model] = None, decoder: Optional[tf.keras.regularizers.Regularizer] = None ) -> tf.keras.Model: """Builds RetinaNet model.""" norm_activation_config = model_config.norm_activation if not backbone: backbone = backbones.factory.build_backbone( input_specs=input_specs, backbone_config=model_config.backbone, norm_activation_config=norm_activation_config, l2_regularizer=l2_regularizer) backbone_features = backbone(tf.keras.Input(input_specs.shape[1:])) if not decoder: decoder = decoders.factory.build_decoder( input_specs=backbone.output_specs, model_config=model_config, l2_regularizer=l2_regularizer) head_config = model_config.head generator_config = model_config.detection_generator num_anchors_per_location = (len(model_config.anchor.aspect_ratios) * model_config.anchor.num_scales) head = dense_prediction_heads.RetinaNetHead( min_level=model_config.min_level, max_level=model_config.max_level, num_classes=model_config.num_classes, num_anchors_per_location=num_anchors_per_location, num_convs=head_config.num_convs, num_filters=head_config.num_filters, attribute_heads=[ cfg.as_dict() for cfg in (head_config.attribute_heads or []) ], share_classification_heads=head_config.share_classification_heads, use_separable_conv=head_config.use_separable_conv, activation=norm_activation_config.activation, use_sync_bn=norm_activation_config.use_sync_bn, norm_momentum=norm_activation_config.norm_momentum, norm_epsilon=norm_activation_config.norm_epsilon, kernel_regularizer=l2_regularizer) # Builds decoder and head so that their trainable weights are initialized if decoder: decoder_features = decoder(backbone_features) _ = head(decoder_features) detection_generator_obj = detection_generator.MultilevelDetectionGenerator( apply_nms=generator_config.apply_nms, pre_nms_top_k=generator_config.pre_nms_top_k, pre_nms_score_threshold=generator_config.pre_nms_score_threshold, nms_iou_threshold=generator_config.nms_iou_threshold, max_num_detections=generator_config.max_num_detections, nms_version=generator_config.nms_version, use_cpu_nms=generator_config.use_cpu_nms, soft_nms_sigma=generator_config.soft_nms_sigma, tflite_post_processing_config=generator_config.tflite_post_processing. as_dict()) model = retinanet_model.RetinaNetModel( backbone, decoder, head, detection_generator_obj, min_level=model_config.min_level, max_level=model_config.max_level, num_scales=model_config.anchor.num_scales, aspect_ratios=model_config.anchor.aspect_ratios, anchor_size=model_config.anchor.anchor_size) return model
def test_build_model(self, use_separable_conv, build_anchor_boxes, is_training, has_att_heads): num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] anchor_size = 3 fpn_num_filters = 256 head_num_convs = 4 head_num_filters = 256 num_anchors_per_location = num_scales * len(aspect_ratios) image_size = 384 images = np.random.rand(2, image_size, image_size, 3) image_shape = np.array([[image_size, image_size], [image_size, image_size]]) if build_anchor_boxes: anchor_boxes = anchor.Anchor( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, image_size=(image_size, image_size)).multilevel_boxes for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) else: anchor_boxes = None if has_att_heads: attribute_heads = [dict(name='depth', type='regression', size=1)] else: attribute_heads = None backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level, num_filters=fpn_num_filters, use_separable_conv=use_separable_conv) head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, attribute_heads=attribute_heads, num_anchors_per_location=num_anchors_per_location, use_separable_conv=use_separable_conv, num_convs=head_num_convs, num_filters=head_num_filters) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10) model = retinanet_model.RetinaNetModel(backbone=backbone, decoder=decoder, head=head, detection_generator=generator, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size) _ = model(images, image_shape, anchor_boxes, training=is_training)
def test_forward(self, strategy, image_size, training, has_att_heads, output_intermediate_features, soft_nms_sigma): """Test for creation of a R50-FPN RetinaNet.""" tf.keras.backend.set_image_data_format('channels_last') num_classes = 3 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0] num_anchors_per_location = num_scales * len(aspect_ratios) images = np.random.rand(2, image_size[0], image_size[1], 3) image_shape = np.array([[image_size[0], image_size[1]], [image_size[0], image_size[1]]]) with strategy.scope(): anchor_gen = anchor.build_anchor_generator( min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=3) anchor_boxes = anchor_gen(image_size) for l in anchor_boxes: anchor_boxes[l] = tf.tile( tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1]) backbone = resnet.ResNet(model_id=50) decoder = fpn.FPN(input_specs=backbone.output_specs, min_level=min_level, max_level=max_level) if has_att_heads: attribute_heads = [ dict(name='depth', type='regression', size=1) ] else: attribute_heads = None head = dense_prediction_heads.RetinaNetHead( min_level=min_level, max_level=max_level, num_classes=num_classes, attribute_heads=attribute_heads, num_anchors_per_location=num_anchors_per_location) generator = detection_generator.MultilevelDetectionGenerator( max_num_detections=10, nms_version='v1', use_cpu_nms=soft_nms_sigma is not None, soft_nms_sigma=soft_nms_sigma) model = retinanet_model.RetinaNetModel( backbone=backbone, decoder=decoder, head=head, detection_generator=generator) model_outputs = model( images, image_shape, anchor_boxes, output_intermediate_features=output_intermediate_features, training=training) if training: cls_outputs = model_outputs['cls_outputs'] box_outputs = model_outputs['box_outputs'] for level in range(min_level, max_level + 1): self.assertIn(str(level), cls_outputs) self.assertIn(str(level), box_outputs) self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, num_classes * num_anchors_per_location ], cls_outputs[str(level)].numpy().shape) self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, 4 * num_anchors_per_location ], box_outputs[str(level)].numpy().shape) if has_att_heads: att_outputs = model_outputs['attribute_outputs'] for att in att_outputs.values(): self.assertAllEqual([ 2, image_size[0] // 2**level, image_size[1] // 2**level, 1 * num_anchors_per_location ], att[str(level)].numpy().shape) else: self.assertIn('detection_boxes', model_outputs) self.assertIn('detection_scores', model_outputs) self.assertIn('detection_classes', model_outputs) self.assertIn('num_detections', model_outputs) self.assertAllEqual([2, 10, 4], model_outputs['detection_boxes'].numpy().shape) self.assertAllEqual( [2, 10], model_outputs['detection_scores'].numpy().shape) self.assertAllEqual( [2, 10], model_outputs['detection_classes'].numpy().shape) self.assertAllEqual([ 2, ], model_outputs['num_detections'].numpy().shape) if has_att_heads: self.assertIn('detection_attributes', model_outputs) self.assertAllEqual([2, 10, 1], model_outputs['detection_attributes'] ['depth'].numpy().shape) if output_intermediate_features: for l in range(2, 6): self.assertIn('backbone_{}'.format(l), model_outputs) self.assertAllEqual([ 2, image_size[0] // 2**l, image_size[1] // 2**l, backbone.output_specs[str(l)].as_list()[-1] ], model_outputs['backbone_{}'.format(l)].numpy().shape) for l in range(min_level, max_level + 1): self.assertIn('decoder_{}'.format(l), model_outputs) self.assertAllEqual([ 2, image_size[0] // 2**l, image_size[1] // 2**l, decoder.output_specs[str(l)].as_list()[-1] ], model_outputs['decoder_{}'.format(l)].numpy().shape)
def testDetectionsOutputShape(self, nms_version, has_att_heads, use_cpu_nms, soft_nms_sigma, use_regular_nms): min_level = 4 max_level = 6 num_scales = 2 max_num_detections = 10 aspect_ratios = [1.0, 2.0] anchor_scale = 2.0 output_size = [64, 64] num_classes = 4 pre_nms_top_k = 5000 pre_nms_score_threshold = 0.01 batch_size = 1 tflite_post_processing_config = { 'max_detections': max_num_detections, 'max_classes_per_detection': 1, 'use_regular_nms': use_regular_nms, 'nms_score_threshold': 0.01, 'nms_iou_threshold': 0.5 } kwargs = { 'apply_nms': True, 'pre_nms_top_k': pre_nms_top_k, 'pre_nms_score_threshold': pre_nms_score_threshold, 'nms_iou_threshold': 0.5, 'max_num_detections': max_num_detections, 'nms_version': nms_version, 'use_cpu_nms': use_cpu_nms, 'soft_nms_sigma': soft_nms_sigma, 'tflite_post_processing_config': tflite_post_processing_config } input_anchor = anchor.build_anchor_generator(min_level, max_level, num_scales, aspect_ratios, anchor_scale) anchor_boxes = input_anchor(output_size) cls_outputs_all = (np.random.rand(84, num_classes) - 0.5) * 3 # random 84x3 outputs. box_outputs_all = np.random.rand(84, 4) # random 84 boxes. class_outputs = { '4': tf.reshape( tf.convert_to_tensor(cls_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, num_classes]), '5': tf.reshape( tf.convert_to_tensor(cls_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, num_classes]), '6': tf.reshape( tf.convert_to_tensor(cls_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, num_classes]), } box_outputs = { '4': tf.reshape( tf.convert_to_tensor(box_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, 4]), '5': tf.reshape( tf.convert_to_tensor(box_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, 4]), '6': tf.reshape( tf.convert_to_tensor(box_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, 4]), } if has_att_heads: att_outputs_all = np.random.rand(84, 1) # random attributes. att_outputs = { 'depth': { '4': tf.reshape( tf.convert_to_tensor(att_outputs_all[0:64], dtype=tf.float32), [1, 8, 8, 1]), '5': tf.reshape( tf.convert_to_tensor(att_outputs_all[64:80], dtype=tf.float32), [1, 4, 4, 1]), '6': tf.reshape( tf.convert_to_tensor(att_outputs_all[80:84], dtype=tf.float32), [1, 2, 2, 1]), } } else: att_outputs = None image_info = tf.constant( [[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], dtype=tf.float32) generator = detection_generator.MultilevelDetectionGenerator(**kwargs) results = generator(box_outputs, class_outputs, anchor_boxes, image_info[:, 1, :], att_outputs) boxes = results['detection_boxes'] classes = results['detection_classes'] scores = results['detection_scores'] valid_detections = results['num_detections'] if nms_version == 'tflite': # When nms_version is `tflite`, all output tensors are empty as the actual # post-processing happens in the TFLite model. self.assertEqual(boxes.numpy().shape, ()) self.assertEqual(scores.numpy().shape, ()) self.assertEqual(classes.numpy().shape, ()) self.assertEqual(valid_detections.numpy().shape, ()) else: self.assertEqual(boxes.numpy().shape, (batch_size, max_num_detections, 4)) self.assertEqual(scores.numpy().shape, ( batch_size, max_num_detections, )) self.assertEqual(classes.numpy().shape, ( batch_size, max_num_detections, )) self.assertEqual(valid_detections.numpy().shape, (batch_size, )) if has_att_heads: for att in results['detection_attributes'].values(): self.assertEqual(att.numpy().shape, (batch_size, max_num_detections, 1))