Пример #1
0
def inception_v2_ssd(img):
    with slim.arg_scope(inception_v2.inception_v2_arg_scope()):
        logits, end_point = inception_v2.inception_v2_base(img)
    c1 = end_point['Mixed_3c']
    c2 = end_point['Mixed_4e']
    c3 = end_point['Mixed_5c']
    return c1, c2, c3
Пример #2
0
def inception_v2_ssd(img):
    with slim.arg_scope(inception_v2.inception_v2_arg_scope()):
        logits, end_point = inception_v2.inception_v2_base(img)
        vbs = slim.get_variables_to_restore()

    c1 = end_point['Mixed_3c']
    c2 = end_point['Mixed_4e']
    c3 = end_point['Mixed_5c']
    return c1, c2, c3, vbs
def inception_v2_ssd(img,cfg):
    with slim.arg_scope(inception_v2.inception_v2_arg_scope()):
        logits, end_point = inception_v2.inception_v2_base(img)

        Mixed_3c = end_point['Mixed_3c']
        Mixed_4e = end_point['Mixed_4e']
        cell_11 = end_point['Mixed_5c']
        vbs = slim.get_trainable_variables()
        cell_11 = tf.image.resize_bilinear(cell_11,size=[32,32])
        cell_11 = tf.concat([cell_11,Mixed_4e],axis=3)

        cell_7 = tf.image.resize_bilinear(Mixed_4e,size=[64,64])
        cell_7 = tf.concat([cell_7,Mixed_3c],axis=3)





    cell_11 = slim.conv2d(cell_11,1024,kernel_size=1,activation_fn=slim.nn.relu)

    cell_7 = slim.conv2d(cell_7, 512, kernel_size=3, activation_fn=slim.nn.relu)
    cell_7 = slim.conv2d(cell_7, 256, kernel_size=1, activation_fn=slim.nn.relu)

    cv6 = slim.conv2d(cell_11, 1024, kernel_size=3, rate=6, activation_fn=slim.nn.relu, scope='conv6')
    cv7 = slim.conv2d(cv6, 1024, kernel_size=1, activation_fn=slim.nn.relu, scope='conv7')

    s = utils.normalize_to_target(cell_7, target_norm_value=12.0, dim=1)

    cv8 = slim.conv2d(cv7, 256, kernel_size=1, stride=1, scope='conv8_0')
    cv8 = slim.conv2d(cv8, 512, kernel_size=3, stride=2, scope='conv8_1')

    cv9 = slim.conv2d(cv8, 128, kernel_size=1, stride=1, scope='conv9_0')
    cv9 = slim.conv2d(cv9, 256, kernel_size=3, stride=2, scope='conv9_1')

    cv10 = slim.conv2d(cv9, 128, kernel_size=1, stride=1, scope='conv10_0')
    cv10 = slim.conv2d(cv10, 256, kernel_size=3, stride=2, scope='conv10_1')

    cv11 = slim.conv2d(cv10, 128, kernel_size=1, stride=1, scope='conv11_0')
    cv11 = slim.conv2d(cv11, 256, kernel_size=3, stride=2, scope='conv11_1')
    source = [s, cv7, cv8, cv9, cv10, cv11]
    conf = []
    loc = []
    for cv, num in zip(source, cfg.Config['aspect_num']):
        print(num)
        loc.append(slim.conv2d(cv, num * 4, kernel_size=3, stride=1, activation_fn=None))

        conf.append(
                slim.conv2d(cv, num * cfg.Config['num_classes'], kernel_size=3, stride=1, activation_fn=None))
        print(loc)
    loc = tf.concat([tf.reshape(o, shape=(cfg.batch_size, -1, 4)) for o in loc], axis=1)
    conf = tf.concat([tf.reshape(o, shape=(cfg.batch_size, -1, cfg.Config['num_classes'])) for o in conf],
                         axis=1)

    return loc, conf, vbs
Пример #4
0
 def get_feature_map(self):
     input_image = self._processor()
     bn_params = {
         'is_training': False,
         'scale': False,
         'decay': 0.9997,
         'epsilon': 0.001
     }
     with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                         normalizer_fn=slim.batch_norm,
                         normalizer_params=bn_params):
         _, activations = inception_v2.inception_v2_base(
             input_image, final_endpoint='Mixed_4e')
     feature_map = activations['Mixed_4e']
     return feature_map
Пример #5
0
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        with tf.control_dependencies([shape_assert]):
            with tf.variable_scope('InceptionV2',
                                   reuse=self._reuse_weights) as scope:
                with _batch_norm_arg_scope(
                    [slim.conv2d, slim.separable_conv2d],
                        batch_norm_scale=True,
                        train_batch_norm=self._train_batch_norm):
                    _, activations = inception_v2.inception_v2_base(
                        preprocessed_inputs,
                        final_endpoint='Mixed_4e',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)

        return tf.concat([
            slim.avg_pool2d(activations['Mixed_3c'], [2, 2],
                            stride=2,
                            scope='ex_pool3c',
                            padding='SAME'), activations['Mixed_4e']
        ],
                         3,
                         name='ex_concat'), 0
Пример #6
0
def inception_v2_ssd(img,cfg):
    with slim.arg_scope(inception_v2.inception_v2_arg_scope()):
        logits, end_point = inception_v2.inception_v2_base(img)

        Mixed_3c = end_point['Mixed_3c']
        Mixed_4e = end_point['Mixed_4e']
        Mixed_5c = end_point['Mixed_5c']
        vbs = slim.get_trainable_variables()
        #vbs = None
        cell_11 = tf.image.resize_bilinear(Mixed_5c,size=[int(32*(cfg.image_size/512)),int(32*(cfg.image_size/512))])
        cell_11 = tf.concat([cell_11,Mixed_4e],axis=3)

        cell_7 = tf.image.resize_bilinear(Mixed_4e,size=[int(64*(cfg.image_size/512)),int(64*(cfg.image_size/512))])
        cell_7 = tf.concat([cell_7,Mixed_3c],axis=3)


    mask_fp = get_mask_fp(Mixed_3c ,Mixed_4e,Mixed_5c)

    cell_11 = slim.conv2d(cell_11,1024,kernel_size=1,activation_fn=slim.nn.relu)

    cell_7 = slim.conv2d(cell_7, 512, kernel_size=3, activation_fn=slim.nn.relu)
    cell_7 = slim.conv2d(cell_7, 256, kernel_size=1, activation_fn=slim.nn.relu)

    cv6 = slim.conv2d(cell_11, 1024, kernel_size=3, rate=6, activation_fn=slim.nn.relu, scope='conv6')
    cv7 = slim.conv2d(cv6, 1024, kernel_size=1, activation_fn=slim.nn.relu, scope='conv7')

    s = utils.normalize_to_target(cell_7, target_norm_value=cfg.norm_value, dim=1)

    cv8 = inception(cv7, out_put=512, name='cv8', stride=2)
    cv9 = inception(cv8, out_put=256, name='cv9', stride=2)
    cv10 = inception(cv9, out_put=256, name='cv10', stride=2)
    cv11 = inception(cv10, out_put=256,name= 'cv11', stride=2)

    source = [s, cv7, cv8, cv9, cv10, cv11]
    conf = []
    loc = []
    for cv, num in zip(source, cfg.Config['aspect_num']):

        loc.append(slim.conv2d(cv, num * 4, kernel_size=3, stride=1, activation_fn=None))

        conf.append(
                slim.conv2d(cv, num * cfg.Config['num_classes'], kernel_size=3, stride=1, activation_fn=None))

    loc = tf.concat([tf.reshape(o, shape=(cfg.batch_size, -1, 4)) for o in loc], axis=1)
    conf = tf.concat([tf.reshape(o, shape=(cfg.batch_size, -1, cfg.Config['num_classes'])) for o in conf],
                         axis=1)

    return loc, conf,mask_fp, vbs
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        feature_map_layout = {
            'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
        }

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(self._conv_hyperparams):
                with tf.variable_scope('InceptionV2',
                                       reuse=self._reuse_weights) as scope:
                    _, image_features = inception_v2.inception_v2_base(
                        preprocessed_inputs,
                        final_endpoint='Mixed_5c',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)
                    feature_maps = feature_map_generators.multi_resolution_feature_maps(
                        feature_map_layout=feature_map_layout,
                        depth_multiplier=self._depth_multiplier,
                        min_depth=self._min_depth,
                        insert_1x1_conv=True,
                        image_features=image_features)
        ret = []
        for endpoint_indicator, feature_map in zip(self._endpoints_indicator,
                                                   feature_maps.values()):
            if endpoint_indicator == '1':
                ret.append(feature_map)
        return ret
Пример #8
0
  def extract_features(self, inputs):
    """Extracts features from inputs.

    This function adds 4 additional feature maps on top of 'Mixed_4c' and
    'Mixed_5c' in the base Inception v2 network. For example, if `inputs`
    has shape [1, 300, 300, 3], the generated feature maps have the following
    shapes:
    [
      (1, 19, 19, 576),   # Mixed_4c
      (1, 10, 10, 1024),  # Mixed_5c
      (1, 5, 5, 512),     
      (1, 3, 3, 256),     
      (1, 2, 2, 256),     
      (1, 1, 1, 128)      
    ]

    Args:
      inputs: a tensor of shape [batch_size, height, with, channels],
        holding the input images.

    Returns: 
      a list of 6 float tensors of shape [batch_size, height, width, channels],
        holding feature map tensors to be fed to box predictor.
    """
    feature_map_specs_dict = {
        'layer_name': ['Mixed_4c', 'Mixed_5c', None, None, None, None],
        'layer_depth': [None, None, 512, 256, 256, 128]
    }

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope:
        _, end_points = inception_v2.inception_v2_base(
          inputs,
          final_endpoint='Mixed_5c',
          min_depth=16,
          depth_multiplier=self._depth_multiplier,
          scope=scope)
        feature_maps = feature_map_generators.ssd_feature_maps(
            feature_map_tensor_dict=end_points,
            feature_map_specs_dict=feature_map_specs_dict,
            depth_multiplier=1, # for depthwise conv in separable_conv2d
            use_depthwise=self._use_depthwise, 
            insert_1x1_conv=True)
        feature_map_list = list(feature_maps.values())
        return feature_map_list
Пример #9
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '',
                           ''][:self._num_layers],
            'layer_depth': [-1, -1, 512, 256, 256, 128][:self._num_layers],
            'use_explicit_padding':
            self._use_explicit_padding,
            'use_depthwise':
            self._use_depthwise,
        }

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with tf.variable_scope('InceptionV2',
                                   reuse=self._reuse_weights) as scope:
                _, image_features = inception_v2.inception_v2_base(
                    ops.pad_to_multiple(preprocessed_inputs,
                                        self._pad_to_multiple),
                    final_endpoint='Mixed_5c',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope)
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    feature_map_layout = {
        'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
    }

    with tf.control_dependencies([shape_assert]):
      with slim.arg_scope(self._conv_hyperparams):
        with tf.variable_scope('InceptionV2',
                               reuse=self._reuse_weights) as scope:
          _, image_features = inception_v2.inception_v2_base(
              ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
              final_endpoint='Mixed_5c',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          feature_maps = feature_map_generators.multi_resolution_feature_maps(
              feature_map_layout=feature_map_layout,
              depth_multiplier=self._depth_multiplier,
              min_depth=self._min_depth,
              insert_1x1_conv=True,
              image_features=image_features)

    return feature_maps.values()
  def _extract_proposal_features(self, preprocessed_inputs, scope):
    """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    with tf.control_dependencies([shape_assert]):
      with tf.variable_scope('InceptionV2',
                             reuse=self._reuse_weights) as scope:
        with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
                                   batch_norm_scale=True,
                                   train_batch_norm=self._train_batch_norm):
          _, activations = inception_v2.inception_v2_base(
              preprocessed_inputs,
              final_endpoint='Mixed_4e',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)

    return activations['Mixed_4e'], activations
Пример #12
0
def model(img):
    with slim.arg_scope(inception_v2.inception_v2_arg_scope()):
        with slim.arg_scope([slim.batch_norm], is_training=True):
            with slim.arg_scope([slim.conv2d], trainable=True):
                logits, end_point = inception_v2.inception_v2_base(img)

    c1 = end_point['Mixed_3c']
    c2 = end_point['Mixed_4e']
    c3 = end_point['Mixed_5c']
    vbs = slim.get_variables_to_restore()

    c3 = slim.conv2d(c3, 256, 1, 1, activation_fn=None)

    c2 = slim.conv2d(c2, 256, 1, 1,
                     activation_fn=None) + tf.image.resize_bilinear(
                         c3, size=tf.shape(c2)[1:3])

    c1 = slim.conv2d(c1, 256, 1, 1,
                     activation_fn=None) + tf.image.resize_bilinear(
                         c2, size=tf.shape(c1)[1:3])

    return c1, c2, c3, vbs
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        33, preprocessed_inputs)

    feature_map_layout = {
        'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
        'layer_depth': [-1, -1, 512, 256, 256, 128],
        'use_explicit_padding': self._use_explicit_padding,
        'use_depthwise': self._use_depthwise,
    }

    with slim.arg_scope(self._conv_hyperparams_fn()):
      with tf.variable_scope('InceptionV2',
                             reuse=self._reuse_weights) as scope:
        _, image_features = inception_v2.inception_v2_base(
            ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple),
            final_endpoint='Mixed_5c',
            min_depth=self._min_depth,
            depth_multiplier=self._depth_multiplier,
            scope=scope)
        feature_maps = feature_map_generators.multi_resolution_feature_maps(
            feature_map_layout=feature_map_layout,
            depth_multiplier=self._depth_multiplier,
            min_depth=self._min_depth,
            insert_1x1_conv=True,
            image_features=image_features)

    return feature_maps.values()
Пример #14
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.
    
        Args:
          preprocessed_inputs: a [batch, height, width, channels] float tensor
            representing a batch of images.
    
        Returns:
          feature_maps: a list of tensors where the ith tensor has shape
            [batch, height_i, width_i, depth_i]
        """
        # Make sure that input is in correct format with rank 4.
        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        with tf.control_dependencies([shape_assert]):
            with slim.arg_scope(self._conv_hyperparams):
                with tf.variable_scope('InceptionV2',
                                       reuse=self._reuse_weights) as scope:
                    _, image_features = inception_v2.inception_v2_base(
                        ops.pad_to_multiple(preprocessed_inputs,
                                            self._pad_to_multiple),
                        final_endpoint='Mixed_5c',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        use_separable_conv=False,
                        scope=scope)

                    # 2. STDN version + combine mode
                    image_features = combine_and_scale_transfer_module_v1(
                        image_features, combine_mode=0)

        # return a list of feature maps
        return image_features.values()
Пример #15
0
  def _extract_first_stage_features(self, inputs):
    """Extracts first stage features for RPN proposal prediction and
    for ROI pooling.

    Args:
      inputs: float tensor of shape [batch_size, height, width, depth].

    Returns:
      shared_feature_map: float tensor of shape 
        [batch_size, height_out, width_out, depth_out].
    """
    with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope:
      with slim.arg_scope([slim.conv2d, slim.separable_conv2d], 
          # is_training
          normalizer_fn=slim.batch_norm,
          normalizer_params=self._batch_norm_params):
        _, end_points = inception_v2.inception_v2_base(
            inputs,
            final_endpoint='Mixed_4e',
            min_depth=16,
            depth_multiplier=self._depth_multiplier,
            scope=scope)
        return end_points['Mixed_4e']
Пример #16
0
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
        trunc_normal = lambda stddev: tf.truncated_normal_initializer(
            0.0, stddev)

        #add convolution autoencoder
        encoder_1_conv = slim.conv2d(preprocessed_inputs,
                                     depth(64), [3, 3],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c1_conv')
        #encoder_1_pool = slim.max_pool2d(encoder_1_conv, [2, 2], stride=2,
        #                                    scope='encoder_c1_pool')
        #encoder_1_dropout1 = slim.dropout(encoder_1_pool, 0.7, scope='encoder_c1_dropout1')
        encoder_2_conv = slim.conv2d(encoder_1_conv,
                                     depth(128), [5, 5],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c2_conv')
        encoder_3_conv = slim.conv2d(encoder_2_conv,
                                     depth(128), [5, 5],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c3_conv')
        #decoder
        decoder_3_deconv = slim.conv2d_transpose(
            encoder_3_conv,
            depth(128), [5, 5],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c3_deconv')
        decoder_2_deconv = slim.conv2d_transpose(
            decoder_3_deconv,
            depth(128), [5, 5],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c2_deconv')
        decoder_1_deconv = slim.conv2d_transpose(
            decoder_2_deconv,
            depth(1), [3, 3],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c1_deconv')
        #visualize decoder output
        #vis_decoder = tf.slice(decoder_1_deconv,(0,0,0,0),(1,-1,-1,-1))
        #print("vis_decoder:",vis_decoder)
        #vis_decoder = tf.reshape(vis_decoder,(256,256,1))
        #print("vis_decoder2:",vis_decoder)
        #vis_decoder = tf.transpose(vis_decoder,(2,0,3,1))
        #vis_decoder = tf.reshape(vis_decoder,(1,256,256,1))
        #image_matrix = vis_decoder.eval()
        #print("image_matrix:",image_matrix)

        #tf.summary.image('decoder_visualized',tf.expand_dims(vis_decoder, 0)

        with tf.control_dependencies([shape_assert]):
            with tf.variable_scope('InceptionV2',
                                   reuse=self._reuse_weights) as scope:
                with _batch_norm_arg_scope(
                    [slim.conv2d, slim.separable_conv2d],
                        batch_norm_scale=True,
                        train_batch_norm=self._train_batch_norm):
                    _, activations = inception_v2.inception_v2_base(
                        decoder_1_deconv,
                        final_endpoint='Mixed_4e',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)

        return activations['Mixed_4e'], activations
    def _extract_proposal_features(self, preprocessed_inputs, scope):
        """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

        preprocessed_inputs.get_shape().assert_has_rank(4)
        shape_assert = tf.Assert(
            tf.logical_and(
                tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
            ['image size must at least be 33 in both height and width.'])

        depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
        trunc_normal = lambda stddev: tf.truncated_normal_initializer(
            0.0, stddev)
        '''
    Add a multi branch encoder with different dilation
    '''
        '''
    Block 1
    '''
        encoder_1_conv = slim.conv2d(preprocessed_inputs,
                                     depth(32), [3, 3],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c1_conv')
        encoder_2_conv = slim.conv2d(encoder_1_conv,
                                     depth(64), [5, 5],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c2_conv')
        encoder_3_conv = slim.conv2d(encoder_2_conv,
                                     depth(128), [5, 5],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c3_conv')
        '''
    Block 2
    '''
        encoder_1_conv_dilated_1 = slim.conv2d(
            preprocessed_inputs,
            depth(32), [3, 3],
            rate=3,
            weights_initializer=trunc_normal(0.09),
            scope='encoder_1_conv_dilated_1')
        encoder_2_conv_dilated_1 = slim.conv2d(
            encoder_1_conv_dilated_1,
            depth(64), [5, 5],
            rate=3,
            weights_initializer=trunc_normal(0.09),
            scope='encoder_2_conv_dilated_1')
        encoder_3_conv_dilated_1 = slim.conv2d(
            encoder_2_conv_dilated_1,
            depth(128), [5, 5],
            rate=3,
            weights_initializer=trunc_normal(0.09),
            scope='encoder_3_conv_dilated_1')
        '''
    Block 3
    '''
        encoder_1_conv_dilated_2 = slim.conv2d(
            preprocessed_inputs,
            depth(32), [3, 3],
            rate=5,
            weights_initializer=trunc_normal(0.09),
            scope='encoder_1_conv_dilated_2')
        encoder_2_conv_dilated_2 = slim.conv2d(
            encoder_1_conv_dilated_2,
            depth(64), [5, 5],
            rate=5,
            weights_initializer=trunc_normal(0.09),
            scope='encoder_2_conv_dilated_2')
        encoder_3_conv_dilated_2 = slim.conv2d(
            encoder_2_conv_dilated_2,
            depth(128), [5, 5],
            rate=5,
            weights_initializer=trunc_normal(0.09),
            scope='encoder_3_conv_dilated_2')

        encoder_concat = tf.concat([
            encoder_3_conv, encoder_3_conv_dilated_1, encoder_3_conv_dilated_2
        ],
                                   axis=3)

        #decoder
        decoder_3_deconv = slim.conv2d_transpose(
            encoder_concat,
            depth(64), [5, 5],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c3_deconv')
        decoder_2_deconv = slim.conv2d_transpose(
            decoder_3_deconv,
            depth(32), [5, 5],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c2_deconv')
        decoder_1_deconv = slim.conv2d_transpose(
            decoder_2_deconv,
            depth(1), [3, 3],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c1_deconv')

        with tf.control_dependencies([shape_assert]):
            with tf.variable_scope('InceptionV2',
                                   reuse=self._reuse_weights) as scope:
                with _batch_norm_arg_scope(
                    [slim.conv2d, slim.separable_conv2d],
                        batch_norm_scale=True,
                        train_batch_norm=self._train_batch_norm):
                    _, activations = inception_v2.inception_v2_base(
                        decoder_1_deconv,
                        final_endpoint='Mixed_4e',
                        min_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)

        return activations['Mixed_4e'], activations
Пример #18
0
def network_fn(inputs):
    # return transformer_factory.transform(inputs, BATCH_PER_GPU, NUM_STN, (224, 224), NUM_CLASSES, FLAGS.weight_decay, True)
    end_points = {}
    # with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True):
    # with slim.arg_scope(inception_v3_arg_scope(weight_decay=FLAGS.weight_decay)):
    with slim.arg_scope([slim.batch_norm, slim.dropout],
                        is_training=is_training):
        with slim.arg_scope(inception_v3_arg_scope(weight_decay=weight_decay)):
            with tf.variable_scope("loc") as scope:
                with tf.variable_scope("net") as scope2:
                    # _, _end_points = inception_resnet_v2.inception_resnet_v2(inputs, num_classes=2, is_training=True, scope = scope2)
                    loc_net, _ = inception_v2.inception_v2_base(inputs,
                                                                scope=scope2)
                # loc_net = _end_points['Conv2d_7b_1x1']
                loc_net = slim.conv2d(loc_net, 128, [1, 1], scope='Loc_1x1')
                default_kernel_size = [14, 14]
                # kernel_size = _reduced_kernel_size_for_small_input(loc_net, default_kernel_size)
                loc_net = slim.conv2d(loc_net,
                                      128,
                                      loc_net.get_shape()[1:3],
                                      padding='VALID',
                                      activation_fn=tf.nn.tanh,
                                      scope='Loc_fc1')
                loc_net = slim.flatten(loc_net)
                iv = 4.
                initial = np.array([iv, 0, iv, 0] * NUM_STN, dtype=np.float32)
                b_fc_loc = tf.get_variable(
                    "Loc_fc_b",
                    shape=[4 * NUM_STN],
                    initializer=init_ops.constant_initializer(initial),
                    dtype=dtypes.float32)
                W_fc_loc = tf.get_variable(
                    "Loc_fc_W",
                    shape=[128, 4 * NUM_STN],
                    initializer=init_ops.constant_initializer(
                        np.zeros((128, 4 * NUM_STN))),
                    dtype=dtypes.float32)
                theta = tf.nn.tanh(tf.matmul(loc_net, W_fc_loc) + b_fc_loc)
            _finals = []
            for i in xrange(NUM_STN):
                scope_name = "stn%d" % i
                with tf.variable_scope(scope_name) as scope1:
                    _theta = tf.slice(theta, [0, 4 * i], [-1, 4 * (i + 1)])
                    # loc_net = slim.conv2d(loc_net, 6, [1,1], activation_fn=tf.nn.tanh, scope='Loc_fc', biases_initializer = init_ops.constant_initializer([4.0,0.0,0.0,0.0,4.0,0.0]*128,dtype=dtypes.float32))
                    # loc_net = slim.conv2d(loc_net, 6, [1,1], activation_fn=tf.nn.tanh, scope='Loc_fc', biases_initializer = init_ops.constant_initializer([4.0],dtype=dtypes.float32))
                    # loc_net = slim.flatten(loc_net)
                    stn_output_size = (STN_OUT_SIZE, STN_OUT_SIZE)
                    x = transformer(inputs, _theta, stn_output_size)
                    x.set_shape([
                        BATCH_PER_GPU, stn_output_size[0], stn_output_size[1],
                        3
                    ])
                    # x.set_shape(tf.shape(inputs))
                    # tf.reshape(x, tf.shape(inputs))
                    end_points['x'] = x
                    # with tf.variable_scope("net") as scope2:
                    #  return inception_resnet_v2.inception_resnet_v2(x, num_classes=NUM_CLASSES, is_training=True, scope = scope2)
                    with tf.variable_scope("net") as scope2:
                        net, _ = inception_v2.inception_v2_base(x,
                                                                scope=scope2)
                    kernel_size = _reduced_kernel_size_for_small_input(
                        net, [7, 7])
                    net = slim.avg_pool2d(net,
                                          kernel_size,
                                          padding='VALID',
                                          scope='AvgPool_1a')
                    net = slim.dropout(net, keep_prob=0.7, scope='Dropout_1b')
                    _finals.append(net)
            with tf.variable_scope('Logits'):
                net = tf.concat(axis=3, values=_finals)
                logits = slim.conv2d(net,
                                     NUM_CLASSES, [1, 1],
                                     activation_fn=None,
                                     normalizer_fn=None,
                                     scope='Conv2d_1c_1x1')
                logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
                predictions = slim.softmax(logits, scope='Predictions')
                end_points['Predictions'] = predictions

                logits_a = slim.conv2d(net,
                                       NUM_ATTRIBS, [1, 1],
                                       activation_fn=None,
                                       normalizer_fn=None,
                                       scope='Conv2d_1c_1x1_a')
                logits_a = tf.squeeze(logits_a, [1, 2],
                                      name='SpatialSqueeze_a')
                predictions_a = slim.sigmoid(logits_a, scope='Predictions_a')
                end_points['Predictions_a'] = predictions_a
                return logits, logits_a, end_points
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
        trunc_normal = lambda stddev: tf.truncated_normal_initializer(
            0.0, stddev)

        #add convolution autoencoder
        encoder_1_conv = slim.conv2d(preprocessed_inputs,
                                     depth(64), [3, 3],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c1_conv')
        #encoder_1_pool = slim.max_pool2d(encoder_1_conv, [2, 2], stride=2,
        #                                    scope='encoder_c1_pool')
        #encoder_1_dropout1 = slim.dropout(encoder_1_pool, 0.7, scope='encoder_c1_dropout1')
        encoder_2_conv = slim.conv2d(encoder_1_conv,
                                     depth(128), [5, 5],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c2_conv')
        encoder_3_conv = slim.conv2d(encoder_2_conv,
                                     depth(128), [5, 5],
                                     weights_initializer=trunc_normal(0.09),
                                     scope='encoder_c3_conv')
        #decoder
        decoder_3_deconv = slim.conv2d_transpose(
            encoder_3_conv,
            depth(128), [5, 5],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c3_deconv')
        decoder_2_deconv = slim.conv2d_transpose(
            decoder_3_deconv,
            depth(128), [5, 5],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c2_deconv')
        decoder_1_deconv = slim.conv2d_transpose(
            decoder_2_deconv,
            depth(1), [3, 3],
            weights_initializer=trunc_normal(0.09),
            scope='decoder_c1_deconv')

        feature_map_layout = {
            'from_layer': ['Mixed_4c', 'Mixed_5c', '', '', '', ''],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_explicit_padding': self._use_explicit_padding,
            'use_depthwise': self._use_depthwise,
        }

        with slim.arg_scope(self._conv_hyperparams_fn()):
            with tf.variable_scope('InceptionV2',
                                   reuse=self._reuse_weights) as scope:
                _, image_features = inception_v2.inception_v2_base(
                    ops.pad_to_multiple(decoder_1_deconv,
                                        self._pad_to_multiple),
                    final_endpoint='Mixed_5c',
                    min_depth=self._min_depth,
                    depth_multiplier=self._depth_multiplier,
                    scope=scope)
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()