示例#1
0
    def GetAttentionPrelogit(
            self,
            images,
            weight_decay=0.0001,
            attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
            attention_type=_SUPPORTED_ATTENTION_TYPES[0],
            kernel=1,
            training_resnet=False,
            training_attention=False,
            reuse=False,
            use_batch_norm=True):
        """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is
        in training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """
        # Construct Resnet50 features.
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
            _, end_points = self.GetResnet50Subnetwork(
                images, is_training=training_resnet, reuse=reuse)

        feature_map = end_points[self._target_layer_type]

        # Construct attention subnetwork on top of features.
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
            with slim.arg_scope([slim.batch_norm], is_training=training_attention):
                (prelogits, attention_prob, attention_score,
                 end_points) = self._GetAttentionSubnetwork(
                    feature_map,
                    end_points,
                    attention_nonlinear=attention_nonlinear,
                    attention_type=attention_type,
                    kernel=kernel,
                    reuse=reuse)

        return prelogits, attention_prob, attention_score, feature_map, end_points
示例#2
0
    def _GetAttentionModel(
            self,
            images,
            num_classes,
            weight_decay=0.0001,
            attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
            attention_type=_SUPPORTED_ATTENTION_TYPES[0],
            kernel=1,
            training_resnet=False,
            training_attention=False,
            reuse=False):
        """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

        attention_feat, attention_prob, attention_score, feature_map, _ = (
            self.GetAttentionPrelogit(
                images,
                weight_decay,
                attention_nonlinear=attention_nonlinear,
                attention_type=attention_type,
                kernel=kernel,
                training_resnet=training_resnet,
                training_attention=training_attention,
                reuse=reuse))
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=weight_decay, batch_norm_scale=True)):
            with slim.arg_scope([slim.batch_norm], is_training=training_attention):
                with tf.variable_scope(
                        _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
                    logits = slim.conv2d(
                        attention_feat,
                        num_classes, [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                        scope='logits')
                    logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
        return logits, attention_prob, attention_score, feature_map
示例#3
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
        if self._depth_multiplier != 1.0:
            raise ValueError('Depth multiplier not supported.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            129, preprocessed_inputs)

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    with slim.arg_scope([resnet_v1.bottleneck],
                                        use_bounded_activations=self.
                                        _use_bounded_activations):
                        _, activations = self._resnet_base_fn(
                            inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                       self._pad_to_multiple),
                            num_classes=None,
                            is_training=None,
                            global_pool=False,
                            output_stride=None,
                            store_non_strided_activations=True,
                            scope=scope)

            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
                    base_feature_map_depth=self._base_feature_map_depth,
                    num_layers=self._num_layers,
                    image_features={
                        'image_features':
                        self._filter_features(activations)['block3']
                    })
        return feature_maps.values()
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
        if self._depth_multiplier != 1.0:
            raise ValueError('Depth multiplier not supported.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            129, preprocessed_inputs)

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = self._resnet_base_fn(
                        inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                   self._pad_to_multiple),
                        num_classes=None,
                        is_training=None,
                        global_pool=False,
                        output_stride=None,
                        store_non_strided_activations=True,
                        scope=scope)
                    image_features = self._filter_features(image_features)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope(self._fpn_scope_name,
                                       reuse=self._reuse_weights):
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in ['block2', 'block3', 'block4']],
                        depth=256)
                    last_feature_map = fpn_features['top_down_block4']
                    coarse_features = {}
                    for i in range(5, 7):
                        last_feature_map = slim.conv2d(
                            last_feature_map,
                            num_outputs=256,
                            kernel_size=[3, 3],
                            stride=2,
                            padding='SAME',
                            scope='bottom_up_block{}'.format(i))
                        coarse_features['bottom_up_block{}'.format(
                            i)] = last_feature_map
        return [
            fpn_features['top_down_block2'], fpn_features['top_down_block3'],
            fpn_features['top_down_block4'],
            coarse_features['bottom_up_block5'],
            coarse_features['bottom_up_block6']
        ]