Exemplo n.º 1
0
    def test_get_expected_feature_map_shapes_with_inception_v3(self):
        image_features = {
            'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32),
            'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32),
            'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32)
        }

        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=INCEPTION_V3_LAYOUT,
            depth_multiplier=1,
            min_depth=32,
            insert_1x1_conv=True,
            image_features=image_features)

        expected_feature_map_shapes = {
            'Mixed_5d': (4, 35, 35, 256),
            'Mixed_6e': (4, 17, 17, 576),
            'Mixed_7c': (4, 8, 8, 1024),
            'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512),
            'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256),
            'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)
        }

        init_op = tf.global_variables_initializer()
        with self.test_session() as sess:
            sess.run(init_op)
            out_feature_maps = sess.run(feature_maps)
            out_feature_map_shapes = dict(
                (key, value.shape) for key, value in out_feature_maps.items())
            self.assertDictEqual(out_feature_map_shapes,
                                 expected_feature_map_shapes)
Exemplo n.º 2
0
    def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1(
            self):
        image_features = {
            'Conv2d_11_pointwise':
            tf.random_uniform([4, 16, 16, 512], dtype=tf.float32),
            'Conv2d_13_pointwise':
            tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32),
        }

        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT,
            depth_multiplier=1,
            min_depth=32,
            insert_1x1_conv=True,
            image_features=image_features)

        expected_feature_map_shapes = {
            'Conv2d_11_pointwise': (4, 16, 16, 512),
            'Conv2d_13_pointwise': (4, 8, 8, 1024),
            'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512),
            'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256),
            'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)
        }

        init_op = tf.global_variables_initializer()
        with self.test_session() as sess:
            sess.run(init_op)
            out_feature_maps = sess.run(feature_maps)
            out_feature_map_shapes = dict(
                (key, value.shape) for key, value in out_feature_maps.items())
            self.assertDictEqual(out_feature_map_shapes,
                                 expected_feature_map_shapes)
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer': ['Mixed_5d', 'Mixed_6e', 'Mixed_7c', '', '', ''],
            'layer_depth': [-1, -1, -1, 512, 256, 128],
            'use_explicit_padding': self._use_explicit_padding,
            'use_depthwise': self._use_depthwise,
        }

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with tf.variable_scope('InceptionV3',
                                   reuse=self._reuse_weights) as scope:
                _, image_features = inception_v3.inception_v3_base(
                    ops.pad_to_multiple(preprocessed_inputs,
                                        self._pad_to_multiple),
                    final_endpoint='Mixed_7c',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope)
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
Exemplo n.º 5
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: if image height or width are not 256 pixels.
    """
        image_shape = preprocessed_inputs.get_shape()
        image_shape.assert_has_rank(4)
        image_height = image_shape[1].value
        image_width = image_shape[2].value

        if image_height is None or image_width is None:
            shape_assert = tf.Assert(
                tf.logical_and(tf.equal(tf.shape(preprocessed_inputs)[1], 256),
                               tf.equal(tf.shape(preprocessed_inputs)[2],
                                        256)),
                ['image size must be 256 in both height and width.'])
            with tf.control_dependencies([shape_assert]):
                preprocessed_inputs = tf.identity(preprocessed_inputs)
        elif image_height != 256 or image_width != 256:
            raise ValueError(
                'image size must be = 256 in both height and width;'
                ' image dim = %d,%d' % (image_height, image_width))

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256],
            'conv_kernel_size': [-1, -1, 3, 3, 2],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(is_training=None)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()