示例#1
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer':
            ['layer_15/expansion_output', 'layer_19', '', '', '',
             ''][:self._num_layers],
            'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers],
            'use_depthwise':
            self._use_depthwise,
            'use_explicit_padding':
            self._use_explicit_padding,
        }

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
                with slim.arg_scope(self._conv_hyperparams_fn()):
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)

        return feature_maps.values()
示例#2
0
def segmentation_head(input_tensor, net, is_training, weight_decay, bn_decay):
    with tf.contrib.slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training,
                                        weight_decay=weight_decay)):
        with tf.contrib.slim.arg_scope(
            [tf.contrib.slim.conv2d],
                normalizer_params={
                    'scale': True,
                    'center': True,
                    'epsilon': 1e-5,
                    'decay': bn_decay,
                    'fused': False
                }):
            # feature_map_size = tf.shape(net)
            # branch_1 = tf.reduce_mean(net, [1, 2], name='image_level_global_pool', keepdims=True)
            # branch_1 = tf.contrib.slim.conv2d(branch_1, 256, [1, 1], scope="image_level_conv_1x1",
            #                                   activation_fn=tf.nn.relu6)
            # branch_1 = tf.image.resize_bilinear(branch_1, (feature_map_size[1], feature_map_size[2]),
            #                                     align_corners=True)

            branch_2 = tf.contrib.slim.conv2d(net,
                                              256, [1, 1],
                                              scope='aspp0',
                                              activation_fn=tf.nn.relu6)

            # out = tf.concat([branch_1, branch_2], axis=-1)
            # concat_project = tf.contrib.slim.conv2d(out, 256, [1, 1], scope='concat_projection',
            #                                         activation_fn=tf.nn.relu6)

            final_conv = tf.contrib.slim.conv2d(
                branch_2,
                1, [1, 1],
                scope='final_layer',
                normalizer_fn=None,
                activation_fn=None,
                biases_initializer=tf.contrib.slim.initializers.
                xavier_initializer())
            out = tf.image.resize_bilinear(
                final_conv, (input_tensor.shape[1], input_tensor.shape[2]),
                align_corners=True)

            return out, {
                'branch_2': branch_2,
                'final_conv': final_conv,
                'resize': out
            }
示例#3
0
    def encode(self, input_tensor, name):
        """
        根据vgg16框架对输入的tensor进行编码
        :param input_tensor:
        :param name:
        :param flags:
        :return: 输出vgg16编码特征
        """

        # print(self._phase)
        # model_path = '/logs/'
        # input_tensor = tf.Print(input_tensor, [tf.reduce_sum(tf.to_int32(tf.is_nan(input_tensor))) > 0],
        #                         message="input_tensor")

        with slim.arg_scope(mobilenet_v2.training_scope(is_training=self._is_training)):
            logits, endpoints = mobilenet_v2.mobilenet_base(
                input_tensor=input_tensor,
                is_training=self._is_training)

            ret = OrderedDict()

            ret['layer_7'] = dict()
            # asymetric_7 = self.conv2d(inputdata=endpoints["layer_7"], out_channel=32,
            #                           kernel_size=[3, 1], use_bias=False, name='asymetric_7')
            ret['layer_7']['data'] = endpoints["layer_7"]
            ret['layer_7']['shape'] = endpoints["layer_7"].get_shape().as_list()

            ret['layer_14'] = dict()
            # asymetric_14 = self.conv2d(inputdata=endpoints["layer_14"], out_channel=96,
            #                            kernel_size=[3, 1], use_bias=False, name='asymetric_14')
            ret['layer_14']['data'] = endpoints["layer_14"]
            ret['layer_14']['shape'] = endpoints["layer_14"].get_shape().as_list()

            ret['layer_18'] = dict()
            # asymetric_19 = self.conv2d(inputdata=endpoints["layer_19"], out_channel=1280,
            #                            kernel_size=[3, 1], use_bias=False, name='asymetric_19')
            ret['layer_18']['data'] = endpoints["layer_18"]
            ret['layer_18']['shape'] = endpoints["layer_18"].get_shape().as_list()

        return ret
示例#4
0
def mobilenet_backbone(input_tensor, depth_multiplier, output_stride,
                       is_training, weight_decay, bn_decay):
    with tf.contrib.slim.arg_scope(
            mobilenet_v2.training_scope(is_training=is_training,
                                        weight_decay=weight_decay)):
        with tf.contrib.slim.arg_scope(
            [tf.contrib.slim.conv2d],
                normalizer_params={
                    'scale': True,
                    'center': True,
                    'epsilon': 1e-3,
                    'decay': bn_decay,
                    'fused': False
                }):
            logits, endpoints = mobilenet_v2.mobilenet(
                input_tensor=input_tensor,
                num_classes=2,
                depth_multiplier=depth_multiplier,
                output_stride=output_stride,
                final_endpoint='layer_18')

    net = endpoints['layer_18']
    return net, endpoints
示例#5
0
def style_prediction_mobilenet(style_input_,
                               activation_names,
                               activation_depths,
                               mobilenet_end_point='layer_19',
                               mobilenet_trainable=True,
                               style_params_trainable=False,
                               style_prediction_bottleneck=100,
                               reuse=None):
    """Maps style images to the style embeddings using MobileNetV2.

  Args:
    style_input_: Tensor. Batch of style input images.
    activation_names: string. Scope names of the activations of the transformer
        network which are used to apply style normalization.
    activation_depths: Shapes of the activations of the transformer network
        which are used to apply style normalization.
    mobilenet_end_point: string. Specifies the endpoint to construct the
        MobileNetV2 network up to. This network is part of the style prediction
        network.
    mobilenet_trainable: bool. Should the MobileNetV2 parameters be marked
        as trainable?
    style_params_trainable: bool. Should the mapping from bottleneck to
        beta and gamma parameters be marked as trainable?
    style_prediction_bottleneck: int. Specifies the bottleneck size in the
        number of parameters of the style embedding.
    reuse: bool. Whether to reuse model parameters. Defaults to False.

  Returns:
    Tensor for the output of the style prediction network, Tensor for the
        bottleneck of style parameters of the style prediction network.
  """
    with tf.name_scope('style_prediction_mobilenet') and tf.variable_scope(
            tf.get_variable_scope(), reuse=reuse):
        with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=mobilenet_trainable)):
            _, end_points = mobilenet.mobilenet_base(
                style_input_,
                conv_defs=mobilenet_v2.V2_DEF,
                final_endpoint=mobilenet_end_point,
                scope='MobilenetV2')

        feat_convlayer = end_points[mobilenet_end_point]
        with tf.name_scope('bottleneck'):
            # (batch_size, 1, 1, depth).
            bottleneck_feat = tf.reduce_mean(feat_convlayer,
                                             axis=[1, 2],
                                             keep_dims=True)

        if style_prediction_bottleneck > 0:
            with tf.variable_scope('mobilenet_conv'):
                with slim.arg_scope([slim.conv2d],
                                    activation_fn=None,
                                    normalizer_fn=None,
                                    trainable=mobilenet_trainable):
                    # (batch_size, 1, 1, style_prediction_bottleneck).
                    bottleneck_feat = slim.conv2d(bottleneck_feat,
                                                  style_prediction_bottleneck,
                                                  [1, 1])

        style_params = {}
        with tf.variable_scope('style_params'):
            for i in range(len(activation_depths)):
                with tf.variable_scope(activation_names[i], reuse=reuse):
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        trainable=style_params_trainable):
                        # Computing beta parameter of the style normalization for the
                        # activation_names[i] layer of the style transformer network.
                        # (batch_size, 1, 1, activation_depths[i])
                        beta = slim.conv2d(bottleneck_feat,
                                           activation_depths[i], [1, 1])
                        # (batch_size, activation_depths[i])
                        beta = tf.squeeze(beta, [1, 2], name='SpatialSqueeze')
                        style_params['{}/beta'.format(
                            activation_names[i])] = beta

                        # Computing gamma parameter of the style normalization for the
                        # activation_names[i] layer of the style transformer network.
                        # (batch_size, 1, 1, activation_depths[i])
                        gamma = slim.conv2d(bottleneck_feat,
                                            activation_depths[i], [1, 1])
                        # (batch_size, activation_depths[i])
                        gamma = tf.squeeze(gamma, [1, 2],
                                           name='SpatialSqueeze')
                        style_params['{}/gamma'.format(
                            activation_names[i])] = gamma

    return style_params, bottleneck_feat
示例#6
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        with tf.variable_scope('MobilenetV2',
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(
                mobilenet_v2.training_scope(is_training=None, bn_decay=0.9997)), \
                slim.arg_scope(
                    [mobilenet.depth_multiplier], min_depth=self._min_depth):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = mobilenet_v2.mobilenet_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='layer_19',
                        depth_multiplier=self._depth_multiplier,
                        conv_defs=self._conv_defs,
                        use_explicit_padding=self._use_explicit_padding,
                        scope=scope)
            depth_fn = lambda d: max(int(d * self._depth_multiplier), self.
                                     _min_depth)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('fpn', reuse=self._reuse_weights):
                    feature_blocks = [
                        'layer_4', 'layer_7', 'layer_14', 'layer_19'
                    ]
                    base_fpn_max_level = min(self._fpn_max_level, 5)
                    feature_block_list = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_block_list.append(feature_blocks[level - 2])
                    fpn_features = feature_map_generators.fpn_top_down_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=depth_fn(self._additional_layer_depth),
                        use_depthwise=self._use_depthwise,
                        use_explicit_padding=self._use_explicit_padding)
                    feature_maps = []
                    for level in range(self._fpn_min_level,
                                       base_fpn_max_level + 1):
                        feature_maps.append(fpn_features['top_down_{}'.format(
                            feature_blocks[level - 2])])
                    last_feature_map = fpn_features['top_down_{}'.format(
                        feature_blocks[base_fpn_max_level - 2])]
                    # Construct coarse features
                    padding = 'VALID' if self._use_explicit_padding else 'SAME'
                    kernel_size = 3
                    for i in range(base_fpn_max_level + 1,
                                   self._fpn_max_level + 1):
                        if self._use_depthwise:
                            conv_op = functools.partial(slim.separable_conv2d,
                                                        depth_multiplier=1)
                        else:
                            conv_op = slim.conv2d
                        if self._use_explicit_padding:
                            last_feature_map = ops.fixed_padding(
                                last_feature_map, kernel_size)
                        last_feature_map = conv_op(
                            last_feature_map,
                            num_outputs=depth_fn(self._additional_layer_depth),
                            kernel_size=[kernel_size, kernel_size],
                            stride=2,
                            padding=padding,
                            scope='bottom_up_Conv2d_{}'.format(
                                i - base_fpn_max_level + 19))
                        feature_maps.append(last_feature_map)
        return feature_maps