示例#1
0
def build_retinanet(
    input_specs: tf.keras.layers.InputSpec,
    model_config: retinanet_cfg.RetinaNet,
    l2_regularizer: tf.keras.regularizers.Regularizer = None
) -> tf.keras.Model:
    """Builds RetinaNet model."""
    norm_activation_config = model_config.norm_activation
    backbone = backbones.factory.build_backbone(
        input_specs=input_specs,
        backbone_config=model_config.backbone,
        norm_activation_config=norm_activation_config,
        l2_regularizer=l2_regularizer)
    backbone(tf.keras.Input(input_specs.shape[1:]))

    decoder = decoders.factory.build_decoder(input_specs=backbone.output_specs,
                                             model_config=model_config,
                                             l2_regularizer=l2_regularizer)

    head_config = model_config.head
    generator_config = model_config.detection_generator
    num_anchors_per_location = (len(model_config.anchor.aspect_ratios) *
                                model_config.anchor.num_scales)

    head = dense_prediction_heads.RetinaNetHead(
        min_level=model_config.min_level,
        max_level=model_config.max_level,
        num_classes=model_config.num_classes,
        num_anchors_per_location=num_anchors_per_location,
        num_convs=head_config.num_convs,
        num_filters=head_config.num_filters,
        attribute_heads=[
            cfg.as_dict() for cfg in (head_config.attribute_heads or [])
        ],
        use_separable_conv=head_config.use_separable_conv,
        activation=norm_activation_config.activation,
        use_sync_bn=norm_activation_config.use_sync_bn,
        norm_momentum=norm_activation_config.norm_momentum,
        norm_epsilon=norm_activation_config.norm_epsilon,
        kernel_regularizer=l2_regularizer)

    detection_generator_obj = detection_generator.MultilevelDetectionGenerator(
        apply_nms=generator_config.apply_nms,
        pre_nms_top_k=generator_config.pre_nms_top_k,
        pre_nms_score_threshold=generator_config.pre_nms_score_threshold,
        nms_iou_threshold=generator_config.nms_iou_threshold,
        max_num_detections=generator_config.max_num_detections,
        use_batched_nms=generator_config.use_batched_nms,
        use_cpu_nms=generator_config.use_cpu_nms)

    model = retinanet_model.RetinaNetModel(
        backbone,
        decoder,
        head,
        detection_generator_obj,
        min_level=model_config.min_level,
        max_level=model_config.max_level,
        num_scales=model_config.anchor.num_scales,
        aspect_ratios=model_config.anchor.aspect_ratios,
        anchor_size=model_config.anchor.anchor_size)
    return model
    def test_serialize_deserialize(self):
        """Validate the network can be serialized and deserialized."""
        num_classes = 3
        min_level = 3
        max_level = 7
        num_scales = 3
        aspect_ratios = [1.0]
        num_anchors_per_location = num_scales * len(aspect_ratios)

        backbone = resnet.ResNet(model_id=50)
        decoder = fpn.FPN(input_specs=backbone.output_specs,
                          min_level=min_level,
                          max_level=max_level)
        head = dense_prediction_heads.RetinaNetHead(
            min_level=min_level,
            max_level=max_level,
            num_classes=num_classes,
            num_anchors_per_location=num_anchors_per_location)
        generator = detection_generator.MultilevelDetectionGenerator(
            max_num_detections=10)
        model = retinanet_model.RetinaNetModel(backbone=backbone,
                                               decoder=decoder,
                                               head=head,
                                               detection_generator=generator)

        config = model.get_config()
        new_model = retinanet_model.RetinaNetModel.from_config(config)

        # Validate that the config can be forced to JSON.
        _ = new_model.to_json()

        # If the serialization was successful, the new config should match the old.
        self.assertAllEqual(model.get_config(), new_model.get_config())
  def test_num_params(self,
                      num_classes,
                      min_level,
                      max_level,
                      num_scales,
                      aspect_ratios,
                      resnet_model_id,
                      use_separable_conv,
                      fpn_num_filters,
                      head_num_convs,
                      head_num_filters,
                      expected_num_params):
    num_anchors_per_location = num_scales * len(aspect_ratios)
    image_size = 384
    images = np.random.rand(2, image_size, image_size, 3)
    image_shape = np.array([[image_size, image_size], [image_size, image_size]])

    anchor_boxes = anchor.Anchor(
        min_level=min_level,
        max_level=max_level,
        num_scales=num_scales,
        aspect_ratios=aspect_ratios,
        anchor_size=3,
        image_size=(image_size, image_size)).multilevel_boxes
    for l in anchor_boxes:
      anchor_boxes[l] = tf.tile(
          tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1])

    backbone = resnet.ResNet(model_id=resnet_model_id)
    decoder = fpn.FPN(
        input_specs=backbone.output_specs,
        min_level=min_level,
        max_level=max_level,
        num_filters=fpn_num_filters,
        use_separable_conv=use_separable_conv)
    head = dense_prediction_heads.RetinaNetHead(
        min_level=min_level,
        max_level=max_level,
        num_classes=num_classes,
        num_anchors_per_location=num_anchors_per_location,
        use_separable_conv=use_separable_conv,
        num_convs=head_num_convs,
        num_filters=head_num_filters)
    generator = detection_generator.MultilevelDetectionGenerator(
        max_num_detections=10)
    model = retinanet_model.RetinaNetModel(
        backbone=backbone,
        decoder=decoder,
        head=head,
        detection_generator=generator)

    _ = model(images, image_shape, anchor_boxes, training=True)
    self.assertEqual(expected_num_params, model.count_params())
示例#4
0
    def test_serialize_deserialize(self):
        kwargs = {
            'apply_nms': True,
            'pre_nms_top_k': 1000,
            'pre_nms_score_threshold': 0.1,
            'nms_iou_threshold': 0.5,
            'max_num_detections': 10,
            'use_batched_nms': False,
        }
        generator = detection_generator.MultilevelDetectionGenerator(**kwargs)

        expected_config = dict(kwargs)
        self.assertEqual(generator.get_config(), expected_config)

        new_generator = (
            detection_generator.MultilevelDetectionGenerator.from_config(
                generator.get_config()))

        self.assertAllEqual(generator.get_config(), new_generator.get_config())
    def test_forward(self, strategy, image_size, training, has_att_heads):
        """Test for creation of a R50-FPN RetinaNet."""
        tf.keras.backend.set_image_data_format('channels_last')
        num_classes = 3
        min_level = 3
        max_level = 7
        num_scales = 3
        aspect_ratios = [1.0]
        num_anchors_per_location = num_scales * len(aspect_ratios)

        images = np.random.rand(2, image_size[0], image_size[1], 3)
        image_shape = np.array([[image_size[0], image_size[1]],
                                [image_size[0], image_size[1]]])

        with strategy.scope():
            anchor_gen = anchor.build_anchor_generator(
                min_level=min_level,
                max_level=max_level,
                num_scales=num_scales,
                aspect_ratios=aspect_ratios,
                anchor_size=3)
            anchor_boxes = anchor_gen(image_size)
            for l in anchor_boxes:
                anchor_boxes[l] = tf.tile(
                    tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1])

            backbone = resnet.ResNet(model_id=50)
            decoder = fpn.FPN(input_specs=backbone.output_specs,
                              min_level=min_level,
                              max_level=max_level)

            if has_att_heads:
                attribute_heads = {'depth': ('regression', 1)}
            else:
                attribute_heads = None
            head = dense_prediction_heads.RetinaNetHead(
                min_level=min_level,
                max_level=max_level,
                num_classes=num_classes,
                attribute_heads=attribute_heads,
                num_anchors_per_location=num_anchors_per_location)
            generator = detection_generator.MultilevelDetectionGenerator(
                max_num_detections=10)
            model = retinanet_model.RetinaNetModel(
                backbone=backbone,
                decoder=decoder,
                head=head,
                detection_generator=generator)

            model_outputs = model(images,
                                  image_shape,
                                  anchor_boxes,
                                  training=training)

        if training:
            cls_outputs = model_outputs['cls_outputs']
            box_outputs = model_outputs['box_outputs']
            att_outputs = model_outputs['att_outputs']
            for level in range(min_level, max_level + 1):
                self.assertIn(str(level), cls_outputs)
                self.assertIn(str(level), box_outputs)
                self.assertAllEqual([
                    2, image_size[0] // 2**level, image_size[1] // 2**level,
                    num_classes * num_anchors_per_location
                ], cls_outputs[str(level)].numpy().shape)
                self.assertAllEqual([
                    2, image_size[0] // 2**level, image_size[1] // 2**level,
                    4 * num_anchors_per_location
                ], box_outputs[str(level)].numpy().shape)
                if has_att_heads:
                    for att in att_outputs.values():
                        self.assertAllEqual([
                            2, image_size[0] // 2**level, image_size[1] //
                            2**level, 1 * num_anchors_per_location
                        ], att[str(level)].numpy().shape)
        else:
            self.assertIn('detection_boxes', model_outputs)
            self.assertIn('detection_scores', model_outputs)
            self.assertIn('detection_classes', model_outputs)
            self.assertIn('detection_attributes', model_outputs)
            self.assertIn('num_detections', model_outputs)
            self.assertAllEqual([2, 10, 4],
                                model_outputs['detection_boxes'].numpy().shape)
            self.assertAllEqual(
                [2, 10], model_outputs['detection_scores'].numpy().shape)
            self.assertAllEqual(
                [2, 10], model_outputs['detection_classes'].numpy().shape)
            self.assertAllEqual([
                2,
            ], model_outputs['num_detections'].numpy().shape)
            if has_att_heads:
                self.assertAllEqual([2, 10, 1],
                                    model_outputs['detection_attributes']
                                    ['depth'].numpy().shape)
示例#6
0
    def test_build_model(self, use_separable_conv, build_anchor_boxes,
                         is_training, has_att_heads):
        num_classes = 3
        min_level = 3
        max_level = 7
        num_scales = 3
        aspect_ratios = [1.0]
        anchor_size = 3
        fpn_num_filters = 256
        head_num_convs = 4
        head_num_filters = 256
        num_anchors_per_location = num_scales * len(aspect_ratios)
        image_size = 384
        images = np.random.rand(2, image_size, image_size, 3)
        image_shape = np.array([[image_size, image_size],
                                [image_size, image_size]])

        if build_anchor_boxes:
            anchor_boxes = anchor.Anchor(
                min_level=min_level,
                max_level=max_level,
                num_scales=num_scales,
                aspect_ratios=aspect_ratios,
                anchor_size=anchor_size,
                image_size=(image_size, image_size)).multilevel_boxes
            for l in anchor_boxes:
                anchor_boxes[l] = tf.tile(
                    tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1])
        else:
            anchor_boxes = None

        if has_att_heads:
            attribute_heads = [dict(name='depth', type='regression', size=1)]
        else:
            attribute_heads = None

        backbone = resnet.ResNet(model_id=50)
        decoder = fpn.FPN(input_specs=backbone.output_specs,
                          min_level=min_level,
                          max_level=max_level,
                          num_filters=fpn_num_filters,
                          use_separable_conv=use_separable_conv)
        head = dense_prediction_heads.RetinaNetHead(
            min_level=min_level,
            max_level=max_level,
            num_classes=num_classes,
            attribute_heads=attribute_heads,
            num_anchors_per_location=num_anchors_per_location,
            use_separable_conv=use_separable_conv,
            num_convs=head_num_convs,
            num_filters=head_num_filters)
        generator = detection_generator.MultilevelDetectionGenerator(
            max_num_detections=10)
        model = retinanet_model.RetinaNetModel(backbone=backbone,
                                               decoder=decoder,
                                               head=head,
                                               detection_generator=generator,
                                               min_level=min_level,
                                               max_level=max_level,
                                               num_scales=num_scales,
                                               aspect_ratios=aspect_ratios,
                                               anchor_size=anchor_size)

        _ = model(images, image_shape, anchor_boxes, training=is_training)
示例#7
0
    def testDetectionsOutputShape(self, use_batched_nms):
        min_level = 4
        max_level = 6
        num_scales = 2
        max_num_detections = 100
        aspect_ratios = [
            1.0,
            2.0,
        ]
        anchor_scale = 2.0
        output_size = [64, 64]
        num_classes = 4
        pre_nms_top_k = 5000
        pre_nms_score_threshold = 0.01
        batch_size = 1
        kwargs = {
            'apply_nms': True,
            'pre_nms_top_k': pre_nms_top_k,
            'pre_nms_score_threshold': pre_nms_score_threshold,
            'nms_iou_threshold': 0.5,
            'max_num_detections': max_num_detections,
            'use_batched_nms': use_batched_nms,
        }

        input_anchor = anchor.build_anchor_generator(min_level, max_level,
                                                     num_scales, aspect_ratios,
                                                     anchor_scale)
        anchor_boxes = input_anchor(output_size)
        cls_outputs_all = (np.random.rand(84, num_classes) -
                           0.5) * 3  # random 84x3 outputs.
        box_outputs_all = np.random.rand(84, 4)  # random 84 boxes.
        class_outputs = {
            '4':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[0:64], dtype=tf.float32),
                [1, 8, 8, num_classes]),
            '5':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[64:80], dtype=tf.float32),
                [1, 4, 4, num_classes]),
            '6':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[80:84], dtype=tf.float32),
                [1, 2, 2, num_classes]),
        }
        box_outputs = {
            '4':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[0:64], dtype=tf.float32),
                [1, 8, 8, 4]),
            '5':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[64:80], dtype=tf.float32),
                [1, 4, 4, 4]),
            '6':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[80:84], dtype=tf.float32),
                [1, 2, 2, 4]),
        }
        image_info = tf.constant(
            [[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], dtype=tf.float32)
        generator = detection_generator.MultilevelDetectionGenerator(**kwargs)
        results = generator(box_outputs, class_outputs, anchor_boxes,
                            image_info[:, 1, :])
        boxes = results['detection_boxes']
        classes = results['detection_classes']
        scores = results['detection_scores']
        valid_detections = results['num_detections']

        self.assertEqual(boxes.numpy().shape,
                         (batch_size, max_num_detections, 4))
        self.assertEqual(scores.numpy().shape, (
            batch_size,
            max_num_detections,
        ))
        self.assertEqual(classes.numpy().shape, (
            batch_size,
            max_num_detections,
        ))
        self.assertEqual(valid_detections.numpy().shape, (batch_size, ))
示例#8
0
    def testDetectionsOutputShape(self, nms_version, has_att_heads,
                                  use_cpu_nms, soft_nms_sigma):
        min_level = 4
        max_level = 6
        num_scales = 2
        max_num_detections = 10
        aspect_ratios = [1.0, 2.0]
        anchor_scale = 2.0
        output_size = [64, 64]
        num_classes = 4
        pre_nms_top_k = 5000
        pre_nms_score_threshold = 0.01
        batch_size = 1
        kwargs = {
            'apply_nms': True,
            'pre_nms_top_k': pre_nms_top_k,
            'pre_nms_score_threshold': pre_nms_score_threshold,
            'nms_iou_threshold': 0.5,
            'max_num_detections': max_num_detections,
            'nms_version': nms_version,
            'use_cpu_nms': use_cpu_nms,
            'soft_nms_sigma': soft_nms_sigma,
        }

        input_anchor = anchor.build_anchor_generator(min_level, max_level,
                                                     num_scales, aspect_ratios,
                                                     anchor_scale)
        anchor_boxes = input_anchor(output_size)
        cls_outputs_all = (np.random.rand(84, num_classes) -
                           0.5) * 3  # random 84x3 outputs.
        box_outputs_all = np.random.rand(84, 4)  # random 84 boxes.
        class_outputs = {
            '4':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[0:64], dtype=tf.float32),
                [1, 8, 8, num_classes]),
            '5':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[64:80], dtype=tf.float32),
                [1, 4, 4, num_classes]),
            '6':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[80:84], dtype=tf.float32),
                [1, 2, 2, num_classes]),
        }
        box_outputs = {
            '4':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[0:64], dtype=tf.float32),
                [1, 8, 8, 4]),
            '5':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[64:80], dtype=tf.float32),
                [1, 4, 4, 4]),
            '6':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[80:84], dtype=tf.float32),
                [1, 2, 2, 4]),
        }
        if has_att_heads:
            att_outputs_all = np.random.rand(84, 1)  # random attributes.
            att_outputs = {
                'depth': {
                    '4':
                    tf.reshape(
                        tf.convert_to_tensor(att_outputs_all[0:64],
                                             dtype=tf.float32), [1, 8, 8, 1]),
                    '5':
                    tf.reshape(
                        tf.convert_to_tensor(att_outputs_all[64:80],
                                             dtype=tf.float32), [1, 4, 4, 1]),
                    '6':
                    tf.reshape(
                        tf.convert_to_tensor(att_outputs_all[80:84],
                                             dtype=tf.float32), [1, 2, 2, 1]),
                }
            }
        else:
            att_outputs = None
        image_info = tf.constant(
            [[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], dtype=tf.float32)
        generator = detection_generator.MultilevelDetectionGenerator(**kwargs)
        results = generator(box_outputs, class_outputs, anchor_boxes,
                            image_info[:, 1, :], att_outputs)
        boxes = results['detection_boxes']
        classes = results['detection_classes']
        scores = results['detection_scores']
        valid_detections = results['num_detections']

        self.assertEqual(boxes.numpy().shape,
                         (batch_size, max_num_detections, 4))
        self.assertEqual(scores.numpy().shape, (
            batch_size,
            max_num_detections,
        ))
        self.assertEqual(classes.numpy().shape, (
            batch_size,
            max_num_detections,
        ))
        self.assertEqual(valid_detections.numpy().shape, (batch_size, ))
        if has_att_heads:
            for att in results['detection_attributes'].values():
                self.assertEqual(att.numpy().shape,
                                 (batch_size, max_num_detections, 1))