def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['layer_15/expansion_output', 'layer_19', '', '', '', ''][:self._num_layers], 'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers], 'use_depthwise': self._use_depthwise, 'use_explicit_padding': self._use_explicit_padding, } with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v2.mobilenet_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='layer_19', depth_multiplier=self._depth_multiplier, use_explicit_padding=self._use_explicit_padding, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def segmentation_head(input_tensor, net, is_training, weight_decay, bn_decay): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=is_training, weight_decay=weight_decay)): with tf.contrib.slim.arg_scope( [tf.contrib.slim.conv2d], normalizer_params={ 'scale': True, 'center': True, 'epsilon': 1e-5, 'decay': bn_decay, 'fused': False }): # feature_map_size = tf.shape(net) # branch_1 = tf.reduce_mean(net, [1, 2], name='image_level_global_pool', keepdims=True) # branch_1 = tf.contrib.slim.conv2d(branch_1, 256, [1, 1], scope="image_level_conv_1x1", # activation_fn=tf.nn.relu6) # branch_1 = tf.image.resize_bilinear(branch_1, (feature_map_size[1], feature_map_size[2]), # align_corners=True) branch_2 = tf.contrib.slim.conv2d(net, 256, [1, 1], scope='aspp0', activation_fn=tf.nn.relu6) # out = tf.concat([branch_1, branch_2], axis=-1) # concat_project = tf.contrib.slim.conv2d(out, 256, [1, 1], scope='concat_projection', # activation_fn=tf.nn.relu6) final_conv = tf.contrib.slim.conv2d( branch_2, 1, [1, 1], scope='final_layer', normalizer_fn=None, activation_fn=None, biases_initializer=tf.contrib.slim.initializers. xavier_initializer()) out = tf.image.resize_bilinear( final_conv, (input_tensor.shape[1], input_tensor.shape[2]), align_corners=True) return out, { 'branch_2': branch_2, 'final_conv': final_conv, 'resize': out }
def encode(self, input_tensor, name): """ 根据vgg16框架对输入的tensor进行编码 :param input_tensor: :param name: :param flags: :return: 输出vgg16编码特征 """ # print(self._phase) # model_path = '/logs/' # input_tensor = tf.Print(input_tensor, [tf.reduce_sum(tf.to_int32(tf.is_nan(input_tensor))) > 0], # message="input_tensor") with slim.arg_scope(mobilenet_v2.training_scope(is_training=self._is_training)): logits, endpoints = mobilenet_v2.mobilenet_base( input_tensor=input_tensor, is_training=self._is_training) ret = OrderedDict() ret['layer_7'] = dict() # asymetric_7 = self.conv2d(inputdata=endpoints["layer_7"], out_channel=32, # kernel_size=[3, 1], use_bias=False, name='asymetric_7') ret['layer_7']['data'] = endpoints["layer_7"] ret['layer_7']['shape'] = endpoints["layer_7"].get_shape().as_list() ret['layer_14'] = dict() # asymetric_14 = self.conv2d(inputdata=endpoints["layer_14"], out_channel=96, # kernel_size=[3, 1], use_bias=False, name='asymetric_14') ret['layer_14']['data'] = endpoints["layer_14"] ret['layer_14']['shape'] = endpoints["layer_14"].get_shape().as_list() ret['layer_18'] = dict() # asymetric_19 = self.conv2d(inputdata=endpoints["layer_19"], out_channel=1280, # kernel_size=[3, 1], use_bias=False, name='asymetric_19') ret['layer_18']['data'] = endpoints["layer_18"] ret['layer_18']['shape'] = endpoints["layer_18"].get_shape().as_list() return ret
def mobilenet_backbone(input_tensor, depth_multiplier, output_stride, is_training, weight_decay, bn_decay): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=is_training, weight_decay=weight_decay)): with tf.contrib.slim.arg_scope( [tf.contrib.slim.conv2d], normalizer_params={ 'scale': True, 'center': True, 'epsilon': 1e-3, 'decay': bn_decay, 'fused': False }): logits, endpoints = mobilenet_v2.mobilenet( input_tensor=input_tensor, num_classes=2, depth_multiplier=depth_multiplier, output_stride=output_stride, final_endpoint='layer_18') net = endpoints['layer_18'] return net, endpoints
def style_prediction_mobilenet(style_input_, activation_names, activation_depths, mobilenet_end_point='layer_19', mobilenet_trainable=True, style_params_trainable=False, style_prediction_bottleneck=100, reuse=None): """Maps style images to the style embeddings using MobileNetV2. Args: style_input_: Tensor. Batch of style input images. activation_names: string. Scope names of the activations of the transformer network which are used to apply style normalization. activation_depths: Shapes of the activations of the transformer network which are used to apply style normalization. mobilenet_end_point: string. Specifies the endpoint to construct the MobileNetV2 network up to. This network is part of the style prediction network. mobilenet_trainable: bool. Should the MobileNetV2 parameters be marked as trainable? style_params_trainable: bool. Should the mapping from bottleneck to beta and gamma parameters be marked as trainable? style_prediction_bottleneck: int. Specifies the bottleneck size in the number of parameters of the style embedding. reuse: bool. Whether to reuse model parameters. Defaults to False. Returns: Tensor for the output of the style prediction network, Tensor for the bottleneck of style parameters of the style prediction network. """ with tf.name_scope('style_prediction_mobilenet') and tf.variable_scope( tf.get_variable_scope(), reuse=reuse): with slim.arg_scope( mobilenet_v2.training_scope(is_training=mobilenet_trainable)): _, end_points = mobilenet.mobilenet_base( style_input_, conv_defs=mobilenet_v2.V2_DEF, final_endpoint=mobilenet_end_point, scope='MobilenetV2') feat_convlayer = end_points[mobilenet_end_point] with tf.name_scope('bottleneck'): # (batch_size, 1, 1, depth). bottleneck_feat = tf.reduce_mean(feat_convlayer, axis=[1, 2], keep_dims=True) if style_prediction_bottleneck > 0: with tf.variable_scope('mobilenet_conv'): with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=mobilenet_trainable): # (batch_size, 1, 1, style_prediction_bottleneck). bottleneck_feat = slim.conv2d(bottleneck_feat, style_prediction_bottleneck, [1, 1]) style_params = {} with tf.variable_scope('style_params'): for i in range(len(activation_depths)): with tf.variable_scope(activation_names[i], reuse=reuse): with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, trainable=style_params_trainable): # Computing beta parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) beta = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze') style_params['{}/beta'.format( activation_names[i])] = beta # Computing gamma parameter of the style normalization for the # activation_names[i] layer of the style transformer network. # (batch_size, 1, 1, activation_depths[i]) gamma = slim.conv2d(bottleneck_feat, activation_depths[i], [1, 1]) # (batch_size, activation_depths[i]) gamma = tf.squeeze(gamma, [1, 2], name='SpatialSqueeze') style_params['{}/gamma'.format( activation_names[i])] = gamma return style_params, bottleneck_feat
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) with tf.variable_scope('MobilenetV2', reuse=self._reuse_weights) as scope: with slim.arg_scope( mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \ slim.arg_scope( [mobilenet.depth_multiplier], min_depth=self._min_depth): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = mobilenet_v2.mobilenet_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='layer_19', depth_multiplier=self._depth_multiplier, conv_defs=self._conv_defs, use_explicit_padding=self._use_explicit_padding, scope=scope) depth_fn = lambda d: max(int(d * self._depth_multiplier), self. _min_depth) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('fpn', reuse=self._reuse_weights): feature_blocks = [ 'layer_4', 'layer_7', 'layer_14', 'layer_19' ] base_fpn_max_level = min(self._fpn_max_level, 5) feature_block_list = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_block_list.append(feature_blocks[level - 2]) fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=depth_fn(self._additional_layer_depth), use_depthwise=self._use_depthwise, use_explicit_padding=self._use_explicit_padding) feature_maps = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_maps.append(fpn_features['top_down_{}'.format( feature_blocks[level - 2])]) last_feature_map = fpn_features['top_down_{}'.format( feature_blocks[base_fpn_max_level - 2])] # Construct coarse features padding = 'VALID' if self._use_explicit_padding else 'SAME' kernel_size = 3 for i in range(base_fpn_max_level + 1, self._fpn_max_level + 1): if self._use_depthwise: conv_op = functools.partial(slim.separable_conv2d, depth_multiplier=1) else: conv_op = slim.conv2d if self._use_explicit_padding: last_feature_map = ops.fixed_padding( last_feature_map, kernel_size) last_feature_map = conv_op( last_feature_map, num_outputs=depth_fn(self._additional_layer_depth), kernel_size=[kernel_size, kernel_size], stride=2, padding=padding, scope='bottom_up_Conv2d_{}'.format( i - base_fpn_max_level + 19)) feature_maps.append(last_feature_map) return feature_maps