Exemplo n.º 1
0
    def test_check_min_image_dim_static_shape(self):
        input_tensor = tf.constant(np.zeros([1, 42, 42, 3]))
        _ = shape_utils.check_min_image_dim(33, input_tensor)

        with self.assertRaisesRegexp(
                ValueError,
                'image size must be >= 64 in both height and width.'):
            _ = shape_utils.check_min_image_dim(64, input_tensor)
Exemplo n.º 2
0
    def _extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        image_features = self.mobilenet_v2(
            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple))

        feature_maps = self.feature_map_generator({
            'layer_15/expansion_output':
            image_features[0],
            'layer_19':
            image_features[1]
        })

        return feature_maps.values()
Exemplo n.º 3
0
    def test_check_min_image_dim_dynamic_shape(self):
        input_placeholder = tf.placeholder(tf.float32,
                                           shape=[1, None, None, 3])
        image_tensor = shape_utils.check_min_image_dim(33, input_placeholder)

        with self.test_session() as sess:
            sess.run(image_tensor,
                     feed_dict={input_placeholder: np.zeros([1, 42, 42, 3])})
            with self.assertRaises(tf.errors.InvalidArgumentError):
                sess.run(
                    image_tensor,
                    feed_dict={input_placeholder: np.zeros([1, 32, 32, 3])})
Exemplo n.º 4
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
Exemplo n.º 5
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
        if self._depth_multiplier != 1.0:
            raise ValueError('Depth multiplier not supported.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            129, preprocessed_inputs)

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    with slim.arg_scope([resnet_v1.bottleneck],
                                        use_bounded_activations=self.
                                        _use_bounded_activations):
                        _, activations = self._resnet_base_fn(
                            inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                       self._pad_to_multiple),
                            num_classes=None,
                            is_training=None,
                            global_pool=False,
                            output_stride=None,
                            store_non_strided_activations=True,
                            scope=scope)

            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
                    base_feature_map_depth=self._base_feature_map_depth,
                    num_layers=self._num_layers,
                    image_features={
                        'image_features':
                        self._filter_features(activations)['block3']
                    })
        return feature_maps.values()
Exemplo n.º 6
0
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs.get_shape().assert_has_rank(4)
        preprocessed_inputs = shape_utils.check_min_image_dim(
            min_dim=33, image_tensor=preprocessed_inputs)

        with slim.arg_scope(
                mobilenet_v1.mobilenet_v1_arg_scope(
                    is_training=self._train_batch_norm,
                    weight_decay=self._weight_decay)):
            with tf.variable_scope('MobilenetV1',
                                   reuse=self._reuse_weights) as scope:
                params = {}
                if self._skip_last_stride:
                    params['conv_defs'] = _get_mobilenet_conv_no_last_stride_defs(
                        conv_depth_ratio_in_percentage=self._conv_depth_ratio_in_percentage)
                _, activations = mobilenet_v1.mobilenet_v1_base(
                    preprocessed_inputs,
                    final_endpoint='Conv2d_11_pointwise',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope,
                    **params)
        return activations['Conv2d_11_pointwise'], activations
Exemplo n.º 7
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_explicit_padding': self._use_explicit_padding,
            'use_depthwise': self._use_depthwise,
        }

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with tf.variable_scope('InceptionV2',
                                   reuse=self._reuse_weights) as scope:
                _, image_features = inception_v2.inception_v2_base(
                    ops.pad_to_multiple(preprocessed_inputs,
                                        self._pad_to_multiple),
                    final_endpoint='Mixed_5c',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope)
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
Exemplo n.º 8
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV1',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v1.mobilenet_v1_arg_scope(
                        is_training=None, regularize_depthwise=True)):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v1.mobilenet_v1_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Conv2d_13_pointwise',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
                    base_feature_map_depth=0,
                    num_layers=6,
                    image_features={
                        'image_features': image_features['Conv2d_11_pointwise']
                    })
        return feature_maps.values()
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
        if self._depth_multiplier != 1.0:
            raise ValueError('Depth multiplier not supported.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            129, preprocessed_inputs)

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = self._resnet_base_fn(
                        inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                   self._pad_to_multiple),
                        num_classes=None,
                        is_training=None,
                        global_pool=False,
                        output_stride=None,
                        store_non_strided_activations=True,
                        scope=scope)
                    image_features = self._filter_features(image_features)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope(self._fpn_scope_name,
                                       reuse=self._reuse_weights):
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in ['block2', 'block3', 'block4']],
                        depth=256)
                    last_feature_map = fpn_features['top_down_block4']
                    coarse_features = {}
                    for i in range(5, 7):
                        last_feature_map = slim.conv2d(
                            last_feature_map,
                            num_outputs=256,
                            kernel_size=[3, 3],
                            stride=2,
                            padding='SAME',
                            scope='bottom_up_block{}'.format(i))
                        coarse_features['bottom_up_block{}'.format(
                            i)] = last_feature_map
        return [
            fpn_features['top_down_block2'], fpn_features['top_down_block3'],
            fpn_features['top_down_block4'],
            coarse_features['bottom_up_block5'],
            coarse_features['bottom_up_block6']
        ]
Exemplo n.º 10
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                    mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                 slim.arg_scope(
                     [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        conv_defs=_CONV_DEFS if self._use_depthwise else None,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    feature_blocks = [
                        'layer_4', 'layer_7', 'layer_14', 'layer_19'
                    ]
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append(feature_blocks[level - 2])
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=depth_fn(self._additional_layer_depth),
                        use_depthwise=self._use_depthwise)
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(fpn_features['top_down_{}'.format(
                            feature_blocks[level - 2])])
                    last_feature_map = fpn_features['top_down_{}'.format(
                        feature_blocks[base_fpn_max_level - 2])]
                    # Construct coarse features
                    for i in range(base_fpn_max_level + 1,
                                   self._fpn_max_level + 1):
                        if self._use_depthwise:
                            conv_op = functools.partial(slim.separable_conv2d,
                                                        depth_multiplier=1)
                        else:
                            conv_op = slim.conv2d
                        last_feature_map = conv_op(
                            last_feature_map,
                            num_outputs=depth_fn(self._additional_layer_depth),
                            kernel_size=[3, 3],
                            stride=2,
                            padding='SAME',
                            scope='bottom_up_Conv2d_{}'.format(
                                i - base_fpn_max_level + 19))
                        feature_maps.append(last_feature_map)
        return feature_maps