def GetAttentionPrelogit( self, images, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False, use_batch_norm=True): """Constructs attention model on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels]. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. use_batch_norm: Whether or not to use batch normalization. Returns: prelogits: A tensor of size [batch, 1, 1, channels]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. end_points: Set of activations for external use. """ # Construct Resnet50 features. with slim.arg_scope( resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)): _, end_points = self.GetResnet50Subnetwork( images, is_training=training_resnet, reuse=reuse) feature_map = end_points[self._target_layer_type] # Construct attention subnetwork on top of features. with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=weight_decay, use_batch_norm=use_batch_norm)): with slim.arg_scope([slim.batch_norm], is_training=training_attention): (prelogits, attention_prob, attention_score, end_points) = self._GetAttentionSubnetwork( feature_map, end_points, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, reuse=reuse) return prelogits, attention_prob, attention_score, feature_map, end_points
def _GetAttentionModel( self, images, num_classes, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False): """Constructs attention model on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels] num_classes: The number of output classes. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. Returns: logits: A tensor of size [batch, num_classes]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. """ attention_feat, attention_prob, attention_score, feature_map, _ = ( self.GetAttentionPrelogit( images, weight_decay, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, training_resnet=training_resnet, training_attention=training_attention, reuse=reuse)) with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=weight_decay, batch_norm_scale=True)): with slim.arg_scope([slim.batch_norm], is_training=training_attention): with tf.variable_scope( _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse): logits = slim.conv2d( attention_feat, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze') return logits, attention_prob, attention_score, feature_map
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: depth multiplier is not supported. """ if self._depth_multiplier != 1.0: raise ValueError('Depth multiplier not supported.') preprocessed_inputs = shape_utils.check_min_image_dim( 129, preprocessed_inputs) with tf.variable_scope(self._resnet_scope_name, reuse=self._reuse_weights) as scope: with slim.arg_scope(resnet_v1.resnet_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): with slim.arg_scope([resnet_v1.bottleneck], use_bounded_activations=self. _use_bounded_activations): _, activations = self._resnet_base_fn( inputs=ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=None, global_pool=False, output_stride=None, store_non_strided_activations=True, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.pooling_pyramid_feature_maps( base_feature_map_depth=self._base_feature_map_depth, num_layers=self._num_layers, image_features={ 'image_features': self._filter_features(activations)['block3'] }) return feature_maps.values()
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: depth multiplier is not supported. """ if self._depth_multiplier != 1.0: raise ValueError('Depth multiplier not supported.') preprocessed_inputs = shape_utils.check_min_image_dim( 129, preprocessed_inputs) with tf.variable_scope(self._resnet_scope_name, reuse=self._reuse_weights) as scope: with slim.arg_scope(resnet_v1.resnet_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = self._resnet_base_fn( inputs=ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=None, global_pool=False, output_stride=None, store_non_strided_activations=True, scope=scope) image_features = self._filter_features(image_features) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights): fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in ['block2', 'block3', 'block4']], depth=256) last_feature_map = fpn_features['top_down_block4'] coarse_features = {} for i in range(5, 7): last_feature_map = slim.conv2d( last_feature_map, num_outputs=256, kernel_size=[3, 3], stride=2, padding='SAME', scope='bottom_up_block{}'.format(i)) coarse_features['bottom_up_block{}'.format( i)] = last_feature_map return [ fpn_features['top_down_block2'], fpn_features['top_down_block3'], fpn_features['top_down_block4'], coarse_features['bottom_up_block5'], coarse_features['bottom_up_block6'] ]