def inference(image_batch, keep_probability, 
              phase_train=True, bottleneck_layer_size=512, 
              weight_decay=0.0):
    with tf.variable_scope('LResnetE_IR'):
        with slim.arg_scope([slim.conv2d, slim.fully_connected], 
                             weights_initializer=tf.contrib.layers.xavier_initializer(), 
                             weights_regularizer=slim.l2_regularizer(weight_decay), 
                             biases_initializer=None, #default no biases
                             activation_fn=None,
                             normalizer_fn=None
                             ):
            with slim.arg_scope([slim.conv2d], kernel_size=3):
                with slim.arg_scope([slim.batch_norm],
                                    decay=0.995,
                                    epsilon=1e-5,
                                    scale=True,
                                    is_training=phase_train,
                                    activation_fn=prelu,
                                    updates_collections=None,
                                    variables_collections=[ tf.GraphKeys.TRAINABLE_VARIABLES ]
                                   ):
                    return LResnet50E_IR(images=image_batch, 
                                    keep_probability=keep_probability, 
                                    phase_train=phase_train, 
                                    bottleneck_layer_size=bottleneck_layer_size, 
                                    reuse=None)
Пример #2
0
def get_network_byname(net_name,
                       inputs,
                       num_classes=None,
                       is_training=True,
                       global_pool=True,
                       output_stride=None,
                       spatial_squeeze=True):
    if net_name not in ['resnet_v1_50', 'mobilenet_224', 'inception_resnet', 'vgg16', 'resnet_v1_101']:
        raise ValueError('''not include network: {}, net_name must in [resnet_v1_50, mobilenet_224, 
                            inception_resnet, vgg16, resnet_v1_101]
                         '''.format(net_name))

    if net_name == 'resnet_v1_50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfgs.WEIGHT_DECAY[net_name])):
            logits, end_points = resnet_v1.resnet_v1_50(inputs=inputs,
                                                        num_classes=num_classes,
                                                        is_training=is_training,
                                                        global_pool=global_pool,
                                                        output_stride=output_stride,
                                                        spatial_squeeze=spatial_squeeze
                                                        )

        return logits, end_points
    if net_name == 'resnet_v1_101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfgs.WEIGHT_DECAY[net_name])):
            logits, end_points = resnet_v1.resnet_v1_101(inputs=inputs,
                                                         num_classes=num_classes,
                                                         is_training=is_training,
                                                         global_pool=global_pool,
                                                         output_stride=output_stride,
                                                         spatial_squeeze=spatial_squeeze
                                                         )
        return logits, end_points
Пример #3
0
def mobilenet_v1_arg_scope(is_training=True,
                           stddev=0.09):
  batch_norm_params = {
      'is_training': False,
      'center': True,
      'scale': True,
      'decay': 0.9997,
      'epsilon': 0.001,
      'trainable': False,
  }

  # Set weight_decay for weights in Conv and DepthSepConv layers.
  weights_init = tf.truncated_normal_initializer(stddev=stddev)
  regularizer = tf.contrib.layers.l2_regularizer(cfg.MOBILENET.WEIGHT_DECAY)
  if cfg.MOBILENET.REGU_DEPTH:
    depthwise_regularizer = regularizer
  else:
    depthwise_regularizer = None

  with slim.arg_scope([slim.conv2d, slim.separable_conv2d],
                      trainable=is_training,
                      weights_initializer=weights_init,
                      activation_fn=tf.nn.relu6, 
                      normalizer_fn=slim.batch_norm,
                      padding='SAME'):
    with slim.arg_scope([slim.batch_norm], **batch_norm_params):
      with slim.arg_scope([slim.conv2d], weights_regularizer=regularizer):
        with slim.arg_scope([slim.separable_conv2d],
                            weights_regularizer=depthwise_regularizer) as sc:
          return sc
def inference(image_batch, keep_probability, 
              phase_train=True, bottleneck_layer_size=512, 
              weight_decay=0.0):
    batch_norm_params = {
        'decay': 0.995,
        'epsilon': 0.001,
        'scale':True,
        'is_training': phase_train,
        'updates_collections': None,
        'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ]
    }    
    with tf.variable_scope('Resface'):
        with slim.arg_scope([slim.conv2d, slim.fully_connected], 
                             weights_initializer=tf.contrib.layers.xavier_initializer(),
                             weights_regularizer=slim.l2_regularizer(weight_decay), 
                             activation_fn=prelu,
                             normalizer_fn=slim.batch_norm,
                             #normalizer_fn=None,
                             normalizer_params=batch_norm_params):
            with slim.arg_scope([slim.conv2d], kernel_size=3):
                return resface20(images=image_batch, 
                                keep_probability=keep_probability, 
                                phase_train=phase_train, 
                                bottleneck_layer_size=bottleneck_layer_size, 
                                reuse=None)
Пример #5
0
def resnet_arg_scope(is_training=True,
                     weight_decay=cfg.TRAIN.WEIGHT_DECAY,
                     batch_norm_decay=0.997,
                     batch_norm_epsilon=1e-5,
                     batch_norm_scale=True):
  batch_norm_params = {
    # NOTE 'is_training' here does not work because inside resnet it gets reset:
    # https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py#L187
    'is_training': False,
    'decay': batch_norm_decay,
    'epsilon': batch_norm_epsilon,
    'scale': batch_norm_scale,
    'trainable': cfg.RESNET.BN_TRAIN,
    'updates_collections': ops.GraphKeys.UPDATE_OPS
  }

  with arg_scope(
      [slim.conv2d],
      weights_regularizer=regularizers.l2_regularizer(weight_decay),
      weights_initializer=initializers.variance_scaling_initializer(),
      trainable=is_training,
      activation_fn=nn_ops.relu,
      normalizer_fn=layers.batch_norm,
      normalizer_params=batch_norm_params):
    with arg_scope([layers.batch_norm], **batch_norm_params) as arg_sc:
      return arg_sc
Пример #6
0
  def conv_tower_fn(self, images, is_training=True, reuse=None):
    """Computes convolutional features using the InceptionV3 model.

    Args:
      images: A tensor of shape [batch_size, height, width, channels].
      is_training: whether is training or not.
      reuse: whether or not the network and its variables should be reused. To
        be able to reuse 'scope' must be given.

    Returns:
      A tensor of shape [batch_size, OH, OW, N], where OWxOH is resolution of
      output feature map and N is number of output features (depends on the
      network architecture).
    """
    mparams = self._mparams['conv_tower_fn']
    logging.debug('Using final_endpoint=%s', mparams.final_endpoint)
    with tf.variable_scope('conv_tower_fn/INCE'):
      if reuse:
        tf.get_variable_scope().reuse_variables()
      with slim.arg_scope(inception.inception_v3_arg_scope()):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
          net, _ = inception.inception_v3_base(
            images, final_endpoint=mparams.final_endpoint)
      return net
Пример #7
0
  def _image_to_head(self, is_training, reuse=None):
    # Base bottleneck
    assert (0 <= cfg.MOBILENET.FIXED_LAYERS <= 12)
    net_conv = self._image
    if cfg.MOBILENET.FIXED_LAYERS > 0:
      with slim.arg_scope(mobilenet_v1_arg_scope(is_training=False)):
        net_conv = mobilenet_v1_base(net_conv,
                                      _CONV_DEFS[:cfg.MOBILENET.FIXED_LAYERS],
                                      starting_layer=0,
                                      depth_multiplier=self._depth_multiplier,
                                      reuse=reuse,
                                      scope=self._scope)
    if cfg.MOBILENET.FIXED_LAYERS < 12:
      with slim.arg_scope(mobilenet_v1_arg_scope(is_training=is_training)):
        net_conv = mobilenet_v1_base(net_conv,
                                      _CONV_DEFS[cfg.MOBILENET.FIXED_LAYERS:12],
                                      starting_layer=cfg.MOBILENET.FIXED_LAYERS,
                                      depth_multiplier=self._depth_multiplier,
                                      reuse=reuse,
                                      scope=self._scope)

    self._act_summaries.append(net_conv)
    self._layers['head'] = net_conv

    return net_conv
Пример #8
0
    def encoder(self, images, is_training):
        activation_fn = leaky_relu  # tf.nn.relu
        weight_decay = 0.0
        with tf.variable_scope('encoder'):
            with slim.arg_scope([slim.batch_norm],
                                is_training=is_training):
                with slim.arg_scope([slim.conv2d, slim.fully_connected],
                                    weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                    weights_regularizer=slim.l2_regularizer(weight_decay),
                                    normalizer_fn=slim.batch_norm,
                                    normalizer_params=self.batch_norm_params):
                    net = images
                    
                    net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a')
                    net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b')
                    
                    net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a')
                    net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b')

                    net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a')
                    net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b')

                    net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a')
                    net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b')
                    
                    net = slim.flatten(net)
                    fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1')
                    fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2')
        return fc1, fc2
Пример #9
0
 def decoder(self, latent_var, is_training):
     activation_fn = leaky_relu  # tf.nn.relu
     weight_decay = 0.0 
     with tf.variable_scope('decoder'):
         with slim.arg_scope([slim.batch_norm],
                             is_training=is_training):
             with slim.arg_scope([slim.conv2d, slim.fully_connected],
                                 weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                 weights_regularizer=slim.l2_regularizer(weight_decay),
                                 normalizer_fn=slim.batch_norm,
                                 normalizer_params=self.batch_norm_params):
                 net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1')
                 net = tf.reshape(net, [-1,4,4,256], name='Reshape')
                 
                 net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1')
                 net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a')
                 net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b')
         
                 net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2')
                 net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a')
                 net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b')
         
                 net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3')
                 net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a')
                 net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b')
         
                 net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4')
                 net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a')
                 net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b')
                 net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c')
             
     return net
Пример #10
0
  def _image_to_head(self, is_training, reuse=None):
    assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3)
    # Now the base is always fixed during training
    with slim.arg_scope(resnet_arg_scope(is_training=False)):
      net_conv = self._build_base()
    if cfg.RESNET.FIXED_BLOCKS > 0:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net_conv, _ = resnet_v1.resnet_v1(net_conv,
                                           self._blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                           global_pool=False,
                                           include_root_block=False,
                                           reuse=reuse,
                                           scope=self._scope)
    if cfg.RESNET.FIXED_BLOCKS < 3:
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net_conv, _ = resnet_v1.resnet_v1(net_conv,
                                           self._blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           reuse=reuse,
                                           scope=self._scope)

    self._act_summaries.append(net_conv)
    self._layers['head'] = net_conv

    return net_conv
Пример #11
0
 def content_extractor(self, images, reuse=False):
     # images: (batch, 32, 32, 3) or (batch, 32, 32, 1)
     
     if images.get_shape()[3] == 1:
         # For mnist dataset, replicate the gray scale image 3 times.
         images = tf.image.grayscale_to_rgb(images)
     
     with tf.variable_scope('content_extractor', reuse=reuse):
         with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None,
                              stride=2,  weights_initializer=tf.contrib.layers.xavier_initializer()):
             with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, 
                                 activation_fn=tf.nn.relu, is_training=(self.mode=='train' or self.mode=='pretrain')):
                 
                 net = slim.conv2d(images, 64, [3, 3], scope='conv1')   # (batch_size, 16, 16, 64)
                 net = slim.batch_norm(net, scope='bn1')
                 net = slim.conv2d(net, 128, [3, 3], scope='conv2')     # (batch_size, 8, 8, 128)
                 net = slim.batch_norm(net, scope='bn2')
                 net = slim.conv2d(net, 256, [3, 3], scope='conv3')     # (batch_size, 4, 4, 256)
                 net = slim.batch_norm(net, scope='bn3')
                 net = slim.conv2d(net, 128, [4, 4], padding='VALID', scope='conv4')   # (batch_size, 1, 1, 128)
                 net = slim.batch_norm(net, activation_fn=tf.nn.tanh, scope='bn4')
                 if self.mode == 'pretrain':
                     net = slim.conv2d(net, 10, [1, 1], padding='VALID', scope='out')
                     net = slim.flatten(net)
                 return net
Пример #12
0
 def factory_fn(image, reuse):
         with slim.arg_scope([slim.batch_norm, slim.dropout],
                             is_training=False):
             with slim.arg_scope([slim.conv2d, slim.fully_connected,
                                  slim.batch_norm, slim.layer_norm],
                                 reuse=reuse):
                 features, logits = _create_network(
                     image, reuse=reuse, weight_decay=weight_decay)
                 return features, logits
Пример #13
0
  def _build_network(self, sess, is_training=True):
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

    # Base bottleneck
    assert (0 <= cfg.MOBILENET.FIXED_LAYERS <= 12)
    net_conv = self._image
    if cfg.MOBILENET.FIXED_LAYERS > 0:
      with slim.arg_scope(mobilenet_v1_arg_scope(is_training=False)):
        net_conv = mobilenet_v1_base(net_conv,
                                      _CONV_DEFS[:cfg.MOBILENET.FIXED_LAYERS],
                                      starting_layer=0,
                                      depth_multiplier=self._depth_multiplier,
                                      scope=self._scope)
    if cfg.MOBILENET.FIXED_LAYERS < 12:
      with slim.arg_scope(mobilenet_v1_arg_scope(is_training=is_training)):
        net_conv = mobilenet_v1_base(net_conv,
                                      _CONV_DEFS[cfg.MOBILENET.FIXED_LAYERS:12],
                                      starting_layer=cfg.MOBILENET.FIXED_LAYERS,
                                      depth_multiplier=self._depth_multiplier,
                                      scope=self._scope)
    
    self._act_summaries.append(net_conv)
    self._layers['head'] = net_conv
    with tf.variable_scope(self._scope, 'MobilenetV1'):
      # build the anchors for the image
      self._anchor_component()
      # region proposal network
      rois = self._region_proposal(net_conv, is_training, initializer)
      # region of interest pooling
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(net_conv, rois, "pool5")
      else:
        raise NotImplementedError

    with slim.arg_scope(mobilenet_v1_arg_scope(is_training=is_training)):
      fc7 = mobilenet_v1_base(pool5,
                              _CONV_DEFS[12:],
                              starting_layer=12,
                              depth_multiplier=self._depth_multiplier,
                              scope=self._scope)

    with tf.variable_scope(self._scope, 'MobilenetV1'):
      # average pooling done by reduce_mean
      fc7 = tf.reduce_mean(fc7, axis=[1, 2])
      # region classification
      cls_prob, bbox_pred = self._region_classification(fc7, is_training, 
                                                        initializer, initializer_bbox)
      
    self._score_summaries.update(self._predictions)

    return rois, cls_prob, bbox_pred
Пример #14
0
  def construct_embedding(self):
    """Builds a conv -> spatial softmax -> FC adaptation network."""
    is_training = self._is_training
    normalizer_params = {'is_training': is_training}
    with tf.variable_scope('tcn_net', reuse=self._reuse) as vs:
      self._adaptation_scope = vs.name
      with slim.arg_scope(
          [slim.layers.conv2d],
          activation_fn=tf.nn.relu,
          normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params,
          weights_regularizer=slim.regularizers.l2_regularizer(
              self._l2_reg_weight),
          biases_regularizer=slim.regularizers.l2_regularizer(
              self._l2_reg_weight)):
        with slim.arg_scope(
            [slim.layers.fully_connected],
            activation_fn=tf.nn.relu,
            normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params,
            weights_regularizer=slim.regularizers.l2_regularizer(
                self._l2_reg_weight),
            biases_regularizer=slim.regularizers.l2_regularizer(
                self._l2_reg_weight)):

          # Input to embedder is pre-trained inception output.
          net = self._pretrained_output

          # Optionally add more conv layers.
          for num_filters in self._additional_conv_sizes:
            net = slim.layers.conv2d(
                net, num_filters, kernel_size=[3, 3], stride=[1, 1])
            net = slim.dropout(net, keep_prob=self._conv_hidden_keep_prob,
                               is_training=is_training)

          # Take the spatial soft arg-max of the last convolutional layer.
          # This is a form of spatial attention over the activations.
          # See more here: http://arxiv.org/abs/1509.06113.
          net = tf.contrib.layers.spatial_softmax(net)
          self.spatial_features = net

          # Add fully connected layers.
          net = slim.layers.flatten(net)
          for fc_hidden_size in self._fc_hidden_sizes:
            net = slim.layers.fully_connected(net, fc_hidden_size)
            if self._fc_hidden_keep_prob < 1.0:
              net = slim.dropout(net, keep_prob=self._fc_hidden_keep_prob,
                                 is_training=is_training)

          # Connect last FC layer to embedding.
          net = slim.layers.fully_connected(net, self._embedding_size,
                                            activation_fn=None)

          # Optionally L2 normalize the embedding.
          if self._embedding_l2:
            net = tf.nn.l2_normalize(net, dim=1)

          return net
 def factory_fn(image, reuse, l2_normalize):
         with slim.arg_scope([slim.batch_norm, slim.dropout],
                             is_training=is_training):
             with slim.arg_scope([slim.conv2d, slim.fully_connected,
                                  slim.batch_norm, slim.layer_norm],
                                 reuse=reuse):
                 features, logits = _create_network(
                     image, num_classes, l2_normalize=l2_normalize,
                     reuse=reuse, create_summaries=is_training,
                     weight_decay=weight_decay)
                 return features, logits
Пример #16
0
def image_embedding(images,
                    model_fn=resnet_v1_152,
                    trainable=True,
                    is_training=True,
                    weight_decay=0.0001,
                    batch_norm_decay=0.997,
                    batch_norm_epsilon=1e-5,
                    batch_norm_scale=True,
                    add_summaries=False,
                    reuse=False):
  """Extract image features from pretrained resnet model."""

  is_resnet_training = trainable and is_training

  batch_norm_params = {
      "is_training": is_resnet_training,
      "trainable": trainable,
      "decay": batch_norm_decay,
      "epsilon": batch_norm_epsilon,
      "scale": batch_norm_scale,
  }

  if trainable:
    weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
  else:
    weights_regularizer = None

  with tf.variable_scope(model_fn.__name__, [images], reuse=reuse) as scope:
    with slim.arg_scope(
        [slim.conv2d],
        weights_regularizer=weights_regularizer,
        trainable=trainable):
      with slim.arg_scope(
          [slim.conv2d],
          weights_initializer=slim.variance_scaling_initializer(),
          activation_fn=tf.nn.relu,
          normalizer_fn=slim.batch_norm,
          normalizer_params=batch_norm_params):
        with slim.arg_scope([slim.batch_norm],
                            is_training=is_resnet_training,
                            trainable=trainable):
          with slim.arg_scope([slim.max_pool2d], padding="SAME"):
            net, end_points = model_fn(
                images, num_classes=None, global_pool=False,
                is_training=is_resnet_training,
                reuse=reuse, scope=scope)

  if add_summaries:
    for v in end_points.values():
      tf.contrib.layers.summaries.summarize_activation(v)

  return net
Пример #17
0
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
Пример #18
0
def build_inceptionv3_graph(images, endpoint, is_training, checkpoint,
                            reuse=False):
  """Builds an InceptionV3 model graph.

  Args:
    images: A 4-D float32 `Tensor` of batch images.
    endpoint: String, name of the InceptionV3 endpoint.
    is_training: Boolean, whether or not to build a training or inference graph.
    checkpoint: String, path to the pretrained model checkpoint.
    reuse: Boolean, whether or not we are reusing the embedder.
  Returns:
    inception_output: `Tensor` holding the InceptionV3 output.
    inception_variables: List of inception variables.
    init_fn: Function to initialize the weights (if not reusing, then None).
  """
  with slim.arg_scope(inception.inception_v3_arg_scope()):
    _, endpoints = inception.inception_v3(
        images, num_classes=1001, is_training=is_training)
    inception_output = endpoints[endpoint]
    inception_variables = slim.get_variables_to_restore()
    inception_variables = [
        i for i in inception_variables if 'global_step' not in i.name]
    if is_training and not reuse:
      init_saver = tf.train.Saver(inception_variables)
      def init_fn(scaffold, sess):
        del scaffold
        init_saver.restore(sess, checkpoint)
    else:
      init_fn = None
    return inception_output, inception_variables, init_fn
Пример #19
0
    def generator(self, inputs, reuse=False):
        # inputs: (batch, 1, 1, 128)
        with tf.variable_scope('generator', reuse=reuse):
            with slim.arg_scope([slim.conv2d_transpose], padding='SAME', activation_fn=None,           
                                 stride=2, weights_initializer=tf.contrib.layers.xavier_initializer()):
                with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, 
                                     activation_fn=tf.nn.relu, is_training=(self.mode=='train')):

                    net = slim.conv2d_transpose(inputs, 512, [4, 4], padding='VALID', scope='conv_transpose1')   # (batch_size, 4, 4, 512)
                    net = slim.batch_norm(net, scope='bn1')
                    net = slim.conv2d_transpose(net, 256, [3, 3], scope='conv_transpose2')  # (batch_size, 8, 8, 256)
                    net = slim.batch_norm(net, scope='bn2')
                    net = slim.conv2d_transpose(net, 128, [3, 3], scope='conv_transpose3')  # (batch_size, 16, 16, 128)
                    net = slim.batch_norm(net, scope='bn3')
                    net = slim.conv2d_transpose(net, 1, [3, 3], activation_fn=tf.nn.tanh, scope='conv_transpose4')   # (batch_size, 32, 32, 1)
                    return net
Пример #20
0
def inference (images, train=True, resnet_stride=8):
    with slim.arg_scope(resnet_v1.resnet_arg_scope(train)):
        net, end_points = resnet_v1_slim(images,
                                num_classes = None,
                                global_pool = False,
                                output_stride = resnet_stride)
        # replace resnet_v1_slim above with resnet_v1.resnet_v1_50/101/...
        # to use standard architectures.

    #  num_classes: Number of predicted classes for classification tasks. If None
    #      we return the features before the logit layer.
    # global_pool: If True, we perform global average pooling before computing the
    #      logits. Set to True for image classification, False for dense prediction.
    # output_stride: If None, then the output will be computed at the nominal
    #      network stride. If output_stride is not None, it specifies the requested
    #      ratio of input to output spatial resolution.
    resnet_depth = utils.last_dimension(net.get_shape(), min_rank=4)

    shape = tf.unpack(tf.shape(images))
    print(shape.__class__)
    shape.pop()
    shape.append(tf.constant(FLAGS.out_channels, dtype=tf.int32))
    print(len(shape))
    filters = tf.Variable(
                    tf.truncated_normal(
                        [resnet_stride*2+1, resnet_stride*2+1, FLAGS.out_channels, resnet_depth],
                        dtype=tf.float32,
                        stddev=0.01),
                    name='filters')
    logits = tf.nn.conv2d_transpose(net, filters, tf.pack(shape),
                    [1,resnet_stride,resnet_stride,1], padding='SAME', name='upscale')
    return logits
Пример #21
0
def conv_net_kelz(inputs):
  """Builds the ConvNet from Kelz 2016."""
  with slim.arg_scope(
      [slim.conv2d, slim.fully_connected],
      activation_fn=tf.nn.relu,
      weights_initializer=tf.contrib.layers.variance_scaling_initializer(
          factor=2.0, mode='FAN_AVG', uniform=True)):
    net = slim.conv2d(
        inputs, 32, [3, 3], scope='conv1', normalizer_fn=slim.batch_norm)

    net = slim.conv2d(
        net, 32, [3, 3], scope='conv2', normalizer_fn=slim.batch_norm)
    net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool2')
    net = slim.dropout(net, 0.25, scope='dropout2')

    net = slim.conv2d(
        net, 64, [3, 3], scope='conv3', normalizer_fn=slim.batch_norm)
    net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool3')
    net = slim.dropout(net, 0.25, scope='dropout3')

    # Flatten while preserving batch and time dimensions.
    dims = tf.shape(net)
    net = tf.reshape(net, (dims[0], dims[1],
                           net.shape[2].value * net.shape[3].value), 'flatten4')

    net = slim.fully_connected(net, 512, scope='fc5')
    net = slim.dropout(net, 0.5, scope='dropout5')

    return net
Пример #22
0
    def create_network(self, name):
        with tf.variable_scope(name) as scope:
            inputs = tf.placeholder(fl32, [None, self.state_dim], 'inputs')

            with slim.arg_scope(
                [slim.fully_connected],
                activation_fn=relu,
                weights_initializer=uniform,
                weights_regularizer=None
            ):

                net = slim.fully_connected(inputs, 1024)

                res = net = slim.fully_connected(net, 128)
                net = slim.fully_connected(net, 256)
                net = slim.fully_connected(net, 128, activation_fn=None)
                net = relu(net+res)

                res = net = slim.fully_connected(net, 128)
                net = slim.fully_connected(net, 256)
                net = slim.fully_connected(net, 128, activation_fn=None)
                net = relu(net+res)

                res = net = slim.fully_connected(net, 128)
                net = slim.fully_connected(net, 256)
                net = slim.fully_connected(net, 128, activation_fn=None)
                net = relu(net+res)

                outputs = slim.fully_connected(
                    net, self.action_dim, activation_fn=tanh)
                outputs = tf.mul(outputs, self.bound)

        return (inputs, outputs, scope.name)
Пример #23
0
    def create_model(self,
                    images,
                    num_classes,
                    weight_decay=0.00004,
                    scope='Flowers',
                    reuse=None,
                    is_training=True):
        """Creates a base part of the Model (no gradients, no loss, no summaries).

        Args:
            images: A tensor of size [batch_size, height, width, channels].
            num_classes: The number of predicted classes.
            scope: Optional variable_scope.
            reuse: Whether or not the network or its variables should be reused. To
                be able to reuse 'scope' must be given.
            is_training: Whether is training or not.

        Returns:
            A named tuple OutputEndpoints.
        """
        with tf.variable_scope(scope, [images], reuse=reuse):
            with slim.arg_scope(inception_v3.inception_v3_arg_scope(weight_decay=weight_decay)):
                logits, endpoints = inception_v3.inception_v3(
                    inputs = images,
                    num_classes=num_classes,
                    is_training=is_training)
                return logits, endpoints
Пример #24
0
def _extra_conv_arg_scope(weight_decay=0.00001, activation_fn=None, normalizer_fn=None):

  with slim.arg_scope(
      [slim.conv2d, slim.conv2d_transpose],
      padding='SAME',
      weights_regularizer=slim.l2_regularizer(weight_decay),
      weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
      activation_fn=activation_fn,
      normalizer_fn=normalizer_fn,) as arg_sc:
    with slim.arg_scope(
      [slim.fully_connected],
          weights_regularizer=slim.l2_regularizer(weight_decay),
          weights_initializer=tf.truncated_normal_initializer(stddev=0.001),
          activation_fn=activation_fn,
          normalizer_fn=normalizer_fn) as arg_sc:
          return arg_sc
Пример #25
0
    def build_feature_pyramid(self):

        '''
        reference: https://github.com/CharlesShang/FastMaskRCNN
        build P2, P3, P4, P5, P6
        :return: multi-scale feature map
        '''

        feature_pyramid = {}
        with tf.variable_scope('feature_pyramid'):
            with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.rpn_weight_decay)):
                feature_pyramid['P5'] = slim.conv2d(self.feature_maps_dict['C5'],
                                                    num_outputs=256,
                                                    kernel_size=[1, 1],
                                                    stride=1,
                                                    scope='build_P5')

                feature_pyramid['P6'] = slim.max_pool2d(feature_pyramid['P5'],
                                                        kernel_size=[2, 2], stride=2, scope='build_P6')
                # P6 is down sample of P5

                for layer in range(4, 1, -1):
                    p, c = feature_pyramid['P' + str(layer + 1)], self.feature_maps_dict['C' + str(layer)]
                    up_sample_shape = tf.shape(c)
                    up_sample = tf.image.resize_nearest_neighbor(p, [up_sample_shape[1], up_sample_shape[2]],
                                                                 name='build_P%d/up_sample_nearest_neighbor' % layer)

                    c = slim.conv2d(c, num_outputs=256, kernel_size=[1, 1], stride=1,
                                    scope='build_P%d/reduce_dimension' % layer)
                    p = up_sample + c
                    p = slim.conv2d(p, 256, kernel_size=[3, 3], stride=1,
                                    padding='SAME', scope='build_P%d/avoid_aliasing' % layer)
                    feature_pyramid['P' + str(layer)] = p

        return feature_pyramid
Пример #26
0
    def build_graph(self, image, label):
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1

        is_training = get_current_tower_context().is_training
        with slim.arg_scope([slim.layers.fully_connected],
                            weights_regularizer=slim.l2_regularizer(1e-5)):
            l = slim.layers.conv2d(image, 32, [3, 3], scope='conv0')
            l = slim.layers.max_pool2d(l, [2, 2], scope='pool0')
            l = slim.layers.conv2d(l, 32, [3, 3], padding='SAME', scope='conv1')
            l = slim.layers.conv2d(l, 32, [3, 3], scope='conv2')
            l = slim.layers.max_pool2d(l, [2, 2], scope='pool1')
            l = slim.layers.conv2d(l, 32, [3, 3], scope='conv3')
            l = slim.layers.flatten(l, scope='flatten')
            l = slim.layers.fully_connected(l, 512, scope='fc0')
            l = slim.layers.dropout(l, is_training=is_training)
            logits = slim.layers.fully_connected(l, 10, activation_fn=None, scope='fc1')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        acc = tf.to_float(tf.nn.in_top_k(logits, label, 1))

        acc = tf.reduce_mean(acc, name='accuracy')
        summary.add_moving_summary(acc)

        summary.add_moving_summary(cost)
        summary.add_param_summary(('.*/weights', ['histogram', 'rms']))  # slim uses different variable names
        return cost + regularize_cost_from_collection()
Пример #27
0
    def create_network(self, name):
        with tf.variable_scope(name) as scope:

            inputs = tf.placeholder(fl32, [None, self.state_dim], 'inputs')
            actions = tf.placeholder(fl32, [None, self.action_dim], 'actions')

            with slim.arg_scope(
                [slim.fully_connected],
                activation_fn=relu,
                weights_initializer=uniform,
                weights_regularizer=None
            ):

                net = tf.concat(1, [inputs, actions])
                net = slim.fully_connected(net, 400)
                net = slim.fully_connected(net, 300)
                '''net = slim.fully_connected(inputs, 400)
                w1 = tf.get_variable(
                    "w1", shape=[400, 300], initializer=uniform
                )
                w2 = tf.get_variable(
                    "w2", shape=[self.action_dim, 300], initializer=uniform
                )
                b = tf.get_variable(
                    "b", shape=[300], initializer=constant
                )
                net = relu(tf.matmul(net, w1) + tf.matmul(actions, w2) + b)'''
                out = slim.fully_connected(net, 1, activation_fn=None)

        return (inputs, actions, out, scope.name)
def build_arch(input, is_train, num_classes):
    data_size = int(input.get_shape()[1])
    # initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
    # bias_initializer = tf.constant_initializer(0.0)
    # weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04)

    with slim.arg_scope([slim.conv2d], trainable=is_train):#, activation_fn=None, , , biases_initializer=bias_initializer, weights_regularizer=weights_regularizer
        with tf.variable_scope('conv1') as scope:
            output = slim.conv2d(input, num_outputs=256, kernel_size=[9, 9], stride=1, padding='VALID', scope=scope)
            data_size = data_size-8
            assert output.get_shape() == [cfg.batch_size, data_size, data_size, 256]
            tf.logging.info('conv1 output shape: {}'.format(output.get_shape()))

        with tf.variable_scope('primary_caps_layer') as scope:
            output = slim.conv2d(output, num_outputs=32*8, kernel_size=[9, 9], stride=2, padding='VALID', scope=scope)#, activation_fn=None
            output = tf.reshape(output, [cfg.batch_size, -1, 8])
            output = squash(output)
            data_size = int(np.floor((data_size-8)/2))
            assert output.get_shape() == [cfg.batch_size, data_size*data_size*32, 8]
            tf.logging.info('primary capsule output shape: {}'.format(output.get_shape()))

        with tf.variable_scope('digit_caps_layer') as scope:
            with tf.variable_scope('u') as scope:
                u_hats = vec_transform(output, num_classes, 16)
                assert u_hats.get_shape() == [cfg.batch_size, num_classes, data_size*data_size*32, 16]
                tf.logging.info('digit_caps_layer u_hats shape: {}'.format(u_hats.get_shape()))

            with tf.variable_scope('routing') as scope:
                output = dynamic_routing(u_hats)
                assert output.get_shape() == [cfg.batch_size, num_classes, 16]
                tf.logging.info('the output capsule has shape: {}'.format(output.get_shape()))

        output_len = tf.norm(output, axis=-1)

    return output, output_len
Пример #29
0
def build_single_inceptionv1(train_tfdata, is_train, dropout_keep_prob):
    with slim.arg_scope(inception.inception_v1_arg_scope()):
        identity, end_points = inception.inception_v1(train_tfdata, dropout_keep_prob = dropout_keep_prob, is_training=is_train)
        net = slim.avg_pool2d(end_points['Mixed_5c'], [7, 7], stride=1, scope='MaxPool_0a_7x7')
        net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
        feature = tf.squeeze(net, [1, 2])
    return identity, feature
	def network_det(self,inputs,reuse=False):

		if reuse:
			tf.get_variable_scope().reuse_variables()

		with slim.arg_scope([slim.conv2d, slim.fully_connected],
							 activation_fn = tf.nn.relu,
							 weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)):
			
			conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1')
			max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1')

			conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2')
			max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2')
			conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3')

			conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4')
			conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5')
			pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5')
			
			shape = int(np.prod(pool5.get_shape()[1:]))
			fc6 = slim.fully_connected(tf.reshape(pool5, [-1, shape]), 4096, scope='fc6')
			
			fc_detection = slim.fully_connected(fc6, 512, scope='fc_det1')
			out_detection = slim.fully_connected(fc_detection, 2, scope='fc_det2', activation_fn = None)
			
		return out_detection
Пример #31
0
def inception_resnet_v1(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV1'):
    """Creates the Inception Resnet V1 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):
                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  128,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')

                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                # net = slim.conv2d(net, 32, 3, padding='VALID',
                #                   scope='Conv2d_2a_3x3')
                # end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                # net = slim.conv2d(net, 32, [1,3], scope='Conv2d_2b_3x3')
                # net = slim.conv2d(net, 32, [3,1], scope='Conv2d_2c_3x3')
                # end_points['Conv2d_2_3x3'] = net
                # # 73 x 73 x 64
                # net = slim.max_pool2d(net, 3, stride=3, padding='VALID',
                #                       scope='MaxPool_3a_3x3')
                # end_points['MaxPool_3a_3x3'] = net
                # # 73 x 73 x 80
                # net = slim.conv2d(net, 64, 1, padding='VALID',
                #                   scope='Conv2d_3b_1x1')
                net = slim.max_pool2d(net,
                                      3,
                                      stride=3,
                                      padding='VALID',
                                      scope='MaxPool_3b_3x3')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                # net = slim.conv2d(net, 192, 3, padding='VALID',
                #                   scope='Conv2d_4a_3x3')
                # end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 256
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_4b_3x3')
                end_points['Conv2d_4b_3x3'] = net

                # 5 x Inception-resnet-A
                net = slim.repeat(net, 1, block35, scale=0.27)
                end_points['Mixed_5a'] = net

                # Reduction-A
                with tf.variable_scope('Mixed_6a'):
                    net = reduction_a(net, 96, 48, 96, 64)
                end_points['Mixed_6a'] = net

                # # 10 x Inception-Resnet-B
                # net = slim.repeat(net, 1, block17, scale=0.10)
                # end_points['Mixed_6b'] = net
                #
                # # Reduction-B
                # with tf.variable_scope('Mixed_7a'):
                #     net = reduction_b(net)
                # end_points['Mixed_7a'] = net

                # 5 x Inception-Resnet-C
                net = slim.repeat(net, 1, block8, scale=0.20)
                end_points['Mixed_8a'] = net

                net = block8(net, activation_fn=None)
                end_points['Mixed_8b'] = net

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    # pylint: disable=no-member
                    net = slim.avg_pool2d(
                        net,
                        net.get_shape()[1:3],
                        padding='VALID',
                        scope='AvgPool_1a_8x8')  ### 修改成max pool
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
Пример #32
0
def resnet_v1(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):
    """Generator for v1 ResNet models.

    This function generates a family of ResNet v1 models. See the resnet_v1_*()
    methods for specific model instantiations, obtained by selecting different
    block instantiations that produce ResNets of various depths.

    Training for image classification on Imagenet is usually done with [224, 224]
    inputs, resulting in [7, 7] feature maps at the output of the last ResNet
    block for the ResNets defined in [1] that have nominal stride equal to 32.
    However, for dense prediction tasks we advise that one uses inputs with
    spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
    this case the feature maps at the ResNet output will have spatial shape
    [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
    and corners exactly aligned with the input image corners, which greatly
    facilitates alignment of the features to the image. Using as input [225, 225]
    images results in [8, 8] feature maps at the output of the last ResNet block.

    For dense prediction tasks, the ResNet needs to run in fully-convolutional
    (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
    have nominal stride equal to 32 and a good choice in FCN mode is to use
    output_stride=16 in order to increase the density of the computed features at
    small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.

    Args:
      inputs: A tensor of size [batch, height_in, width_in, channels].
      blocks: A list of length equal to the number of ResNet blocks. Each element
        is a resnet_utils.Block object describing the units in the block.
      num_classes: Number of predicted classes for classification tasks. If None
        we return the features before the logit layer.
      is_training: whether is training or not.
      global_pool: If True, we perform global average pooling before computing the
        logits. Set to True for image classification, False for dense prediction.
      output_stride: If None, then the output will be computed at the nominal
        network stride. If output_stride is not None, it specifies the requested
        ratio of input to output spatial resolution.
      include_root_block: If True, include the initial convolution followed by
        max-pooling, if False excludes it.
      spatial_squeeze: if True, logits is of shape [B, C], if false logits is
          of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.

    Returns:
      net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
        If global_pool is False, then height_out and width_out are reduced by a
        factor of output_stride compared to the respective height_in and width_in,
        else both height_out and width_out equal one. If num_classes is None, then
        net is the output of the last ResNet block, potentially after global
        average pooling. If num_classes is not None, net contains the pre-softmax
        activations.
      end_points: A dictionary from components of the network to the corresponding
        activation.

    Raises:
      ValueError: If the target output_stride is not valid.
    """
    with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.name + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    net = resnet_utils.conv2d_same(net,
                                                   32,
                                                   3,
                                                   stride=1,
                                                   rate=1,
                                                   scope="conv0")
                    net = slim.utils.collect_named_outputs(
                        end_points_collection, "pool0", net)
                    net = resnet_utils.conv2d_same(net,
                                                   64,
                                                   7,
                                                   stride=2,
                                                   rate=1,
                                                   scope='conv1')
                    net = slim.utils.collect_named_outputs(
                        end_points_collection, "pool1", net)
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')

                    net = slim.utils.collect_named_outputs(
                        end_points_collection, 'pool2', net)

                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)

                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)

                # end_points['pool2'] = end_points['resnet_v1_50/pool1/MaxPool:0']
                try:
                    end_points['pool3'] = end_points['resnet_v1_50/block1']
                    end_points['pool4'] = end_points['resnet_v1_50/block2']
                except:
                    end_points['pool3'] = end_points[
                        'Detection/resnet_v1_50/block1']
                    end_points['pool4'] = end_points[
                        'Detection/resnet_v1_50/block2']
                end_points['pool5'] = net
                # if global_pool:
                #     # Global average pooling.
                #     net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
                # if num_classes is not None:
                #     net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                #                       normalizer_fn=None, scope='logits')
                # if spatial_squeeze:
                #     logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                # else:
                #     logits = net
                # # Convert end_points_collection into a dictionary of end_points.
                # end_points = slim.utils.convert_collection_to_dict(end_points_collection)
                # if num_classes is not None:
                #     end_points['predictions'] = slim.softmax(logits, scope='predictions')
                return net, end_points
Пример #33
0
    def interface_vgg16(self, inputs, reuse=None, is_training=True):
        endpoints = {}
        with slim.arg_scope(vgg_arg_scope()):
            _, vgg_end_points = vgg_16(inputs,
                                       is_training=is_training,
                                       reuse=reuse,
                                       spatial_squeeze=False,
                                       num_classes=None)

        endpoints['net1'] = vgg_end_points['vgg_16/conv1/conv1_2']
        endpoints['net2'] = vgg_end_points['vgg_16/conv2/conv2_2']
        endpoints['net3'] = vgg_end_points['vgg_16/conv3/conv3_3']
        endpoints['net4'] = vgg_end_points['vgg_16/conv4/conv4_3']
        endpoints['net5'] = vgg_end_points['vgg_16/conv5/conv5_3']

        with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)):
            with tf.variable_scope('cloud_net',
                                   'cloud_net', [inputs],
                                   reuse=reuse):
                with tf.variable_scope('feature_exatraction'):
                    nets = vgg_end_points['vgg_16/conv5/conv5_3']
                    nets = slim.conv2d(nets,
                                       512, [3, 3],
                                       stride=2,
                                       scope='pool5')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       512, [3, 3],
                                       scope='conv6')
                    endpoints['net6'] = nets
                    nets = slim.conv2d(nets,
                                       512, [3, 3],
                                       stride=2,
                                       scope='pool6')
                    nets = slim.conv2d(nets, 512, [3, 3], scope='conv7')
                    endpoints['net7'] = nets

                with tf.variable_scope('alpha_prediction'):
                    # alpha prediction
                    nets = endpoints['net7']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans1') + endpoints['net6']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans2') + endpoints['net5']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans3') + endpoints['net4']
                    nets = slim.conv2d_transpose(
                        nets, 256, [3, 3], stride=2,
                        scope='conv_trans4') + endpoints['net3']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans5') + endpoints['net2']
                    nets = slim.conv2d_transpose(
                        nets, 64, [3, 3], stride=2,
                        scope='conv_trans6') + endpoints['net1']
                    alpha_logits = slim.conv2d(nets,
                                               self.alpha_channel, [3, 3],
                                               scope='pred',
                                               activation_fn=None)

                with tf.variable_scope('reflectance_prediction'):
                    # reflectance prediction
                    nets = endpoints['net7']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans1') + endpoints['net6']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans2') + endpoints['net5']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans3') + endpoints['net4']
                    nets = slim.conv2d_transpose(
                        nets, 256, [3, 3], stride=2,
                        scope='conv_trans4') + endpoints['net3']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans5') + endpoints['net2']
                    nets = slim.conv2d_transpose(
                        nets, 64, [3, 3], stride=2,
                        scope='conv_trans6') + endpoints['net1']
                    reflectance_logits = slim.conv2d(nets,
                                                     self.reflectance_channel,
                                                     [3, 3],
                                                     scope='pred',
                                                     activation_fn=None)
        return alpha_logits, reflectance_logits
Пример #34
0
def inference(input_tensor, regularizer=None):
    with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                        stride=1,
                        padding='SAME'):

        with tf.variable_scope("layer1-initconv"):

            data = slim.conv2d(input_tensor, CONV_DEEP, [7, 7])
            data = slim.max_pool2d(data, [2, 2], stride=2)

            with tf.variable_scope("resnet_layer"):

                data = res_block(input_tensor=data,
                                 kshape=[CONV_SIZE, CONV_SIZE],
                                 deph=CONV_DEEP,
                                 layer=6,
                                 half=False,
                                 name="layer4-9-conv")
                data = res_block(input_tensor=data,
                                 kshape=[CONV_SIZE, CONV_SIZE],
                                 deph=CONV_DEEP * 2,
                                 layer=8,
                                 half=True,
                                 name="layer10-15-conv")
                data = res_block(input_tensor=data,
                                 kshape=[CONV_SIZE, CONV_SIZE],
                                 deph=CONV_DEEP * 4,
                                 layer=12,
                                 half=True,
                                 name="layer16-27-conv")
                data = res_block(input_tensor=data,
                                 kshape=[CONV_SIZE, CONV_SIZE],
                                 deph=CONV_DEEP * 8,
                                 layer=6,
                                 half=True,
                                 name="layer28-33-conv")

                data = slim.avg_pool2d(data, [2, 2], stride=2)

                #得到输出信息的维度,用于全连接层的输入
                data_shape = data.get_shape().as_list()
                nodes = data_shape[1] * data_shape[2] * data_shape[3]
                reshaped = tf.reshape(data, [data_shape[0], nodes])
                #最后全连接层
                with tf.variable_scope('layer34-fc'):
                    fc_weights = tf.get_variable(
                        "weight", [nodes, NUM_LABELS],
                        initializer=tf.truncated_normal_initializer(
                            stddev=0.1))

                    # if regularizer != None:
                    #     tf.add_to_collection('losses', regularizer(fc_weights))

                    fc_biases = tf.get_variable(
                        "bias", [NUM_LABELS],
                        initializer=tf.constant_initializer(0.1))
                    fc = tf.nn.relu(
                        tf.matmul(reshaped, fc_weights) + fc_biases)

                    # if train:
                    #     fc = tf.nn.dropout(fc, 0.5)
                    #     return fc

                    return fc
Пример #35
0
def _build_network(images,
                   num_outputs,
                   alpha,
                   keep_prob=1.0,
                   is_training=True,
                   scope='yolo'):
    with tf.variable_scope(scope):
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected],
                activation_fn=leaky_relu(alpha),
                weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
                weights_regularizer=slim.l2_regularizer((0.0005)),
                variables_collections='Variables'):
            net = tf.pad(images,
                         np.array([[0, 0], [3, 3], [3, 3], [0, 0]]),
                         name='pad_1')
            net = slim.conv2d(net,
                              64,
                              7,
                              2,
                              padding='VALID',
                              scope='conv_2',
                              trainable=False)
            net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
            net = slim.conv2d(net, 192, 3, scope='conv_4', trainable=False)
            net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5')
            net = slim.conv2d(net, 128, 1, scope='conv_6')
            net = slim.conv2d(net, 256, 3, scope='conv_7')
            net = slim.conv2d(net, 256, 1, scope='conv_8')
            net = slim.conv2d(net, 512, 3, scope='conv_9')
            net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10')
            net = slim.conv2d(net, 256, 1, scope='conv_11')
            net = slim.conv2d(net, 512, 3, scope='conv_12')
            net = slim.conv2d(net, 256, 1, scope='conv_13')
            net = slim.conv2d(net, 512, 3, scope='conv_14')
            net = slim.conv2d(net, 256, 1, scope='conv_15')
            net = slim.conv2d(net, 512, 3, scope='conv_16')
            net = slim.conv2d(net, 256, 1, scope='conv_17')
            net = slim.conv2d(net, 512, 3, scope='conv_18')
            net = slim.conv2d(net, 512, 1, scope='conv_19')
            # tf.summary.histogram('conv19', net)
            net = slim.conv2d(net, 1024, 3, scope='conv_20')
            # tf.summary.histogram('conv20', net)
            net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21')
            net = slim.conv2d(net, 512, 1, scope='conv_22')
            net = slim.conv2d(net, 1024, 3, scope='conv_23')
            net = slim.conv2d(net, 512, 1, scope='conv_24')
            net = slim.conv2d(net, 1024, 3, scope='conv_25')
            net = slim.conv2d(net, 1024, 3, scope='conv_26')
            # tf.summary.histogram('conv26', net)
            net = tf.pad(net,
                         np.array([[0, 0], [1, 1], [1, 1], [0, 0]]),
                         name='pad_27')
            net = slim.conv2d(net,
                              1024,
                              3,
                              2,
                              padding='VALID',
                              scope='conv_28')
            net = slim.conv2d(net, 1024, 3, scope='conv_29')
            net = slim.conv2d(net, 1024, 3, scope='conv_30')
            net = tf.transpose(net, [0, 3, 1, 2], name='trans_31')
            net = slim.flatten(net, scope='flat_32')
            net = slim.fully_connected(net, 512, scope='fc_33')
            net = slim.fully_connected(net, 4096, scope='fc_34')
            net = slim.dropout(net,
                               keep_prob=keep_prob,
                               is_training=is_training,
                               scope='dropout_35')
            net = slim.fully_connected(net,
                                       num_outputs,
                                       activation_fn=None,
                                       scope='fc_36')
            # net ~ batch * 7 * 7 * 30
        return net
Пример #36
0
 def generator(self, inputs, content_extractor_layers, reuse=False):
     # inputs: (batch, 1, 1, 128)
     with tf.variable_scope('generator', reuse=reuse):
         with slim.arg_scope([slim.conv2d_transpose],
                             padding='SAME',
                             activation_fn=None,
                             stride=2,
                             weights_initializer=tf.contrib.layers.
                             xavier_initializer()):
             with slim.arg_scope([slim.batch_norm],
                                 decay=0.95,
                                 center=True,
                                 scale=True,
                                 activation_fn=tf.nn.relu,
                                 is_training=(self.mode == 'train')):
                 with slim.arg_scope([slim.conv2d],
                                     padding='SAME',
                                     activation_fn=None,
                                     stride=1,
                                     weights_initializer=tf.contrib.layers.
                                     xavier_initializer()):
                     net = slim.conv2d_transpose(
                         inputs,
                         512, [4, 4],
                         padding='VALID',
                         scope='conv_transpose1_1'
                     )  # (batch_size, 4, 4, 512)
                     net = slim.batch_norm(net, scope='bn1_1')
                     net = slim.conv2d(net,
                                       512, [3, 3],
                                       scope='conv_transpose1_2'
                                       )  # (batch_size, 4, 4, 512)
                     net = slim.batch_norm(net, scope='bn1_2')
                     concat = tf.concat(
                         3, (net, content_extractor_layers['conv4_1']))
                     net = slim.conv2d_transpose(
                         concat, 256, [3, 3], scope='conv_transpose2_1'
                     )  # (batch_size, 8, 8, 256)
                     net = slim.batch_norm(net, scope='bn2')
                     net = slim.conv2d(net,
                                       256, [3, 3],
                                       scope='conv_transpose2_2'
                                       )  # (batch_size, 8, 8, 256)
                     net = slim.batch_norm(net, scope='bn2_2')
                     concat = tf.concat(
                         3, (net, content_extractor_layers['conv3_1']))
                     net = slim.conv2d_transpose(
                         concat, 128, [3, 3], scope='conv_transpose3_1'
                     )  # (batch_size, 16, 16, 128)
                     net = slim.batch_norm(net, scope='bn3')
                     net = slim.conv2d(net,
                                       128, [3, 3],
                                       scope='conv_transpose3_2'
                                       )  # (batch_size, 16, 16, 128)
                     net = slim.batch_norm(net, scope='bn3_2')
                     concat = tf.concat(
                         3, (net, content_extractor_layers['conv2_1']))
                     net = slim.conv2d_transpose(
                         concat,
                         3, [3, 3],
                         activation_fn=tf.nn.tanh,
                         scope='conv_transpose4')  # (batch_size, 32, 32, 3)
                     return net
Пример #37
0
    def content_extractor(self, images, reuse=False):
        # images: (batch, 32, 32, 3) or (batch, 32, 32, 1)

        if images.get_shape()[3] == 1:
            # For mnist dataset, replicate the gray scale image 3 times.
            images = tf.image.grayscale_to_rgb(images)

        with tf.variable_scope('content_extractor', reuse=reuse):
            with slim.arg_scope([slim.conv2d],
                                padding='SAME',
                                activation_fn=None,
                                weights_initializer=tf.contrib.layers.
                                xavier_initializer()):
                with slim.arg_scope([slim.batch_norm],
                                    decay=0.95,
                                    center=True,
                                    scale=True,
                                    activation_fn=tf.nn.relu,
                                    is_training=(self.mode == 'train'
                                                 or self.mode == 'pretrain')):
                    layers = {}
                    conv1_1 = slim.conv2d(
                        images, 64, [3, 3], stride=1,
                        scope='conv1_1')  # (batch_size, 32, 32, 64)
                    conv1_1 = slim.batch_norm(conv1_1, scope='bn1_1')
                    layers['conv1_1'] = conv1_1
                    conv1_2 = slim.conv2d(
                        conv1_1, 64, [3, 3], stride=2,
                        scope='conv1_2')  # (batch_size, 16, 16, 64)
                    conv1_2 = slim.batch_norm(conv1_2, scope='bn1_2')
                    layers['conv1_2'] = conv1_2
                    conv2_1 = slim.conv2d(
                        conv1_2, 128, [3, 3], stride=1,
                        scope='conv2_1')  # (batch_size, 16, 16, 128)
                    conv2_1 = slim.batch_norm(conv2_1, scope='bn2_1')
                    layers['conv2_1'] = conv2_1
                    conv2_2 = slim.conv2d(
                        conv2_1, 128, [3, 3], stride=2,
                        scope='conv2_2')  # (batch_size, 8, 8, 128)
                    conv2_2 = slim.batch_norm(conv2_2, scope='bn2_2')
                    layers['conv2_2'] = conv2_2
                    conv3_1 = slim.conv2d(
                        conv2_2, 256, [3, 3], stride=1,
                        scope='conv3_1')  # (batch_size, 8, 8, 256)
                    conv3_1 = slim.batch_norm(conv3_1, scope='bn3_1')
                    layers['conv3_1'] = conv3_1
                    conv3_2 = slim.conv2d(
                        conv3_1, 256, [3, 3], stride=2,
                        scope='conv3_2')  # (batch_size, 4, 4, 256)
                    conv3_2 = slim.batch_norm(conv3_2, scope='bn3_2')
                    layers['conv3_2'] = conv3_2
                    conv4_1 = slim.conv2d(
                        conv3_2, 512, [3, 3], stride=1,
                        scope='conv4_1')  # (batch_size, 4, 4, 512)
                    conv4_1 = slim.batch_norm(conv4_1, scope='bn4_1')
                    layers['conv4_1'] = conv4_1
                    net = slim.conv2d(
                        conv4_1,
                        512, [4, 4],
                        stride=2,
                        padding='VALID',
                        scope='conv4_2')  # (batch_size, 1, 1, 512)
                    net = slim.batch_norm(net,
                                          activation_fn=tf.nn.tanh,
                                          scope='bn4_2')
                    layers['conv4_2'] = net
                    if self.mode == 'pretrain':
                        net = slim.conv2d(net,
                                          self.num_classes, [1, 1],
                                          padding='VALID',
                                          scope='out')
                        net = slim.flatten(net)
                    return net, layers
def resnet_base(img_batch, scope_name, is_training=True):
    if scope_name.endswith('b'):
        get_resnet_fn = get_resnet_v1_b_base
    elif scope_name.endswith('d'):
        get_resnet_fn = get_resnet_v1_d_base
    else:
        raise ValueError("scope Name erro....")

    _, feature_dict = get_resnet_fn(
        input_x=img_batch,
        scope=scope_name,
        bottleneck_nums=BottleNeck_NUM_DICT[scope_name],
        base_channels=BASE_CHANNELS_DICT[scope_name],
        is_training=is_training,
        freeze_norm=True,
        freeze=cfgs.FREEZE_BLOCKS)

    pyramid_dict = {}
    with tf.variable_scope('build_pyramid'):
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(
                                cfgs.WEIGHT_DECAY),
                            activation_fn=None,
                            normalizer_fn=None):

            P5 = slim.conv2d(feature_dict['C5'],
                             num_outputs=256,
                             kernel_size=[1, 1],
                             stride=1,
                             scope='build_P5')

            pyramid_dict['P5'] = P5

            for level in range(4, 2, -1):  # build [P4, P3]

                pyramid_dict['P%d' % level] = fusion_two_layer(
                    C_i=feature_dict["C%d" % level],
                    P_j=pyramid_dict["P%d" % (level + 1)],
                    scope='build_P%d' % level)
            for level in range(5, 2, -1):
                pyramid_dict['P%d' % level] = slim.conv2d(
                    pyramid_dict['P%d' % level],
                    num_outputs=256,
                    kernel_size=[3, 3],
                    padding="SAME",
                    stride=1,
                    scope="fuse_P%d" % level)

            p6 = slim.conv2d(
                pyramid_dict['P5'] if cfgs.USE_P5 else feature_dict['C5'],
                num_outputs=256,
                kernel_size=[3, 3],
                padding="SAME",
                stride=2,
                scope='p6_conv')
            pyramid_dict['P6'] = p6

            p7 = tf.nn.relu(p6, name='p6_relu')

            p7 = slim.conv2d(p7,
                             num_outputs=256,
                             kernel_size=[3, 3],
                             padding="SAME",
                             stride=2,
                             scope='p7_conv')

            pyramid_dict['P7'] = p7

    # for level in range(7, 1, -1):
    #     add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level))

    return pyramid_dict
Пример #39
0
    def interface_unet(self, inputs, reuse=None, is_training=True):
        endpoints = {}
        with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)):
            with tf.variable_scope('cloud_net',
                                   'cloud_net', [inputs],
                                   reuse=reuse):
                with tf.variable_scope('feature_exatraction'):
                    nets = slim.repeat(inputs, 2, slim.conv2d, 64,
                                       [3, 3])  # 508*508*64
                    endpoints['net1'] = nets
                    nets = slim.max_pool2d(nets, [2, 2])  # 254*254*64

                    nets = slim.repeat(nets, 2, slim.conv2d, 128,
                                       [3, 3])  # 250*250*128
                    endpoints['net2'] = nets
                    nets = slim.max_pool2d(nets, [2, 2])  # 125*125*128

                    nets = slim.repeat(nets, 2, slim.conv2d, 256,
                                       [3, 3])  # 121*121*256
                    endpoints['net3'] = nets
                    nets = slim.max_pool2d(nets, [2, 2])  # 61*61*256

                    nets = slim.repeat(nets, 2, slim.conv2d, 512,
                                       [3, 3])  # 57*57*512
                    endpoints['net4'] = nets
                    nets = slim.max_pool2d(nets, [2, 2])  # 29*29*512

                    nets = slim.repeat(nets, 2, slim.conv2d, 1024,
                                       [3, 3])  # 25*25*1024
                    endpoints['net5'] = nets

                with tf.variable_scope('alpha_prediction'):
                    nets = endpoints['net5']
                    nets = slim.conv2d_transpose(nets, 512, [3, 3],
                                                 stride=2)  # 50*50*512
                    nets = self.crop_and_concat(endpoints['net4'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 512,
                                       [3, 3])  # 46*46*512

                    nets = slim.conv2d_transpose(nets, 256, [3, 3],
                                                 stride=2)  # 92*92*256
                    nets = self.crop_and_concat(endpoints['net3'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 256,
                                       [3, 3])  # 88*88*256

                    nets = slim.conv2d_transpose(nets, 128, [3, 3],
                                                 stride=2)  # 176*176*128
                    nets = self.crop_and_concat(endpoints['net2'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 128,
                                       [3, 3])  # 172*172*128

                    nets = slim.conv2d_transpose(nets, 64, [3, 3],
                                                 stride=2)  # 344*344*64
                    nets = self.crop_and_concat(endpoints['net1'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 64,
                                       [3, 3])  # 340*340*64

                    logits = slim.conv2d(nets,
                                         self.alpha_channel, [3, 3],
                                         padding='SAME',
                                         activation_fn=None)
                    alpha_logits = tf.image.resize_images(
                        logits, [self.img_size, self.img_size])

                with tf.variable_scope('reflectance_prediction'):
                    nets = endpoints['net5']
                    nets = slim.conv2d_transpose(nets, 512, [3, 3],
                                                 stride=2)  # 50*50*512
                    nets = self.crop_and_concat(endpoints['net4'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 512,
                                       [3, 3])  # 46*46*512

                    nets = slim.conv2d_transpose(nets, 256, [3, 3],
                                                 stride=2)  # 92*92*256
                    nets = self.crop_and_concat(endpoints['net3'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 256,
                                       [3, 3])  # 88*88*256

                    nets = slim.conv2d_transpose(nets, 128, [3, 3],
                                                 stride=2)  # 176*176*128
                    nets = self.crop_and_concat(endpoints['net2'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 128,
                                       [3, 3])  # 172*172*128

                    nets = slim.conv2d_transpose(nets, 64, [3, 3],
                                                 stride=2)  # 344*344*64
                    nets = self.crop_and_concat(endpoints['net1'], nets)
                    nets = slim.repeat(nets, 2, slim.conv2d, 64,
                                       [3, 3])  # 340*340*64

                    logits = slim.conv2d(nets,
                                         self.reflectance_channel, [3, 3],
                                         padding='SAME',
                                         activation_fn=None)
                    reflectance_logits = tf.image.resize_images(
                        logits, [self.img_size, self.img_size])

            return alpha_logits, reflectance_logits
Пример #40
0
    def interface_resnet50(self, inputs, reuse=None, is_training=False):

        endpoints = {}
        with slim.arg_scope(resnet_arg_scope(use_batch_norm=True)):
            _, resnet_endpoints = resnet_v2_50(
                inputs,
                reuse=reuse,
                is_training=is_training,
            )

        endpoints['net1'] = resnet_endpoints[
            'resnet_v2_50/block1/unit_2/bottleneck_v2']  # 128*128 256
        endpoints['net2'] = resnet_endpoints[
            'resnet_v2_50/block2/unit_3/bottleneck_v2']  # 64*64 512
        endpoints['net3'] = resnet_endpoints[
            'resnet_v2_50/block3/unit_5/bottleneck_v2']  # 32*32 1024
        endpoints['net4'] = resnet_endpoints[
            'resnet_v2_50/block4/unit_3/bottleneck_v2']  # 16*16 2048

        with slim.arg_scope(
                self.fcn_arg_scope(is_training=is_training,
                                   normalizer_fn=None)):
            with tf.variable_scope('cloud_net',
                                   'cloud_net', [inputs],
                                   reuse=reuse):

                with tf.variable_scope('alpha_prediction'):
                    # alpha prediction
                    nets = resnet_endpoints[
                        'resnet_v2_50/block4']  # 64*64*2048
                    nets = slim.conv2d_transpose(
                        nets, 512, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints[
                            'resnet_v2_50/block2/unit_2/bottleneck_v2']

                    nets = slim.conv2d_transpose(
                        nets, 256, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints[
                            'resnet_v2_50/block1/unit_2/bottleneck_v2']

                    nets = slim.conv2d_transpose(
                        nets, 64, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints['resnet_v2_50/conv1']

                    alpha_logits = slim.conv2d(nets,
                                               self.alpha_channel, [3, 3],
                                               scope='pred',
                                               activation_fn=None)

                with tf.variable_scope('reflectance_prediction'):
                    # reflectance prediction
                    nets = resnet_endpoints[
                        'resnet_v2_50/block4']  # 64*64*2048
                    nets = slim.conv2d_transpose(
                        nets, 512, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints[
                            'resnet_v2_50/block2/unit_2/bottleneck_v2']

                    nets = slim.conv2d_transpose(
                        nets, 256, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints[
                            'resnet_v2_50/block1/unit_2/bottleneck_v2']

                    nets = slim.conv2d_transpose(
                        nets, 64, kernel_size=[3, 3],
                        stride=2) + resnet_endpoints['resnet_v2_50/conv1']

                    reflectance_logits = slim.conv2d(nets,
                                                     self.reflectance_channel,
                                                     [3, 3],
                                                     scope='pred',
                                                     activation_fn=None)
        return alpha_logits, reflectance_logits
Пример #41
0
def create_model(input, landmark, phase_train, args):
    batch_norm_params = {
        'decay': 0.995,
        'epsilon': 0.001,
        'updates_collections': None,  #tf.GraphKeys.UPDATE_OPS,
        'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES],
        'is_training': phase_train
    }

    landmark_dim = int(landmark.get_shape()[-1])
    features, landmarks_pre = pfld_inference(input, args.weight_decay,
                                             batch_norm_params)
    # loss
    landmarks_loss = tf.reduce_sum(tf.square(landmarks_pre - landmark), axis=1)
    landmarks_loss = tf.reduce_mean(landmarks_loss)

    # add the auxiliary net
    # : finish the loss function
    print('\nauxiliary net')
    with slim.arg_scope([slim.convolution2d, slim.fully_connected], \
                        activation_fn=tf.nn.relu,\
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(args.weight_decay),
                        normalizer_fn=slim.batch_norm,
                        normalizer_params=batch_norm_params):
        pfld_input = features['auxiliary_input']
        net_aux = slim.convolution2d(pfld_input,
                                     128, [3, 3],
                                     stride=2,
                                     scope='pfld_conv1')
        print(net_aux.name, net_aux.get_shape())
        # net = slim.max_pool2d(net, kernel_size=[3, 3], stride=1, scope='pool1', padding='SAME')
        net_aux = slim.convolution2d(net_aux,
                                     128, [3, 3],
                                     stride=1,
                                     scope='pfld_conv2')
        print(net_aux.name, net_aux.get_shape())
        net_aux = slim.convolution2d(net_aux,
                                     32, [3, 3],
                                     stride=2,
                                     scope='pfld_conv3')
        print(net_aux.name, net_aux.get_shape())
        net_aux = slim.convolution2d(net_aux,
                                     128, [7, 7],
                                     stride=1,
                                     scope='pfld_conv4')
        print(net_aux.name, net_aux.get_shape())
        net_aux = slim.max_pool2d(net_aux,
                                  kernel_size=[3, 3],
                                  stride=1,
                                  scope='pool1',
                                  padding='SAME')
        print(net_aux.name, net_aux.get_shape())
        net_aux = slim.flatten(net_aux)
        print(net_aux.name, net_aux.get_shape())
        fc1 = slim.fully_connected(net_aux,
                                   num_outputs=32,
                                   activation_fn=None,
                                   scope='pfld_fc1')
        print(fc1.name, fc1.get_shape())
        euler_angles_pre = slim.fully_connected(fc1,
                                                num_outputs=3,
                                                activation_fn=None,
                                                scope='pfld_fc2')
        print(euler_angles_pre.name, euler_angles_pre.get_shape())

    # return landmarks_loss, landmarks, heatmap_loss, HeatMaps
    return landmarks_pre, landmarks_loss, euler_angles_pre
Пример #42
0
def resnet_v2(inputs,
              blocks,
              num_classes=None,
              is_training=True,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
              spatial_squeeze=True,
              reuse=None,
              scope=None):

    with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        with slim.arg_scope(
            [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense],
                outputs_collections=end_points_collection):
            with slim.arg_scope([slim.batch_norm], is_training=is_training):
                net = inputs
                if include_root_block:
                    if output_stride is not None:
                        if output_stride % 4 != 0:
                            raise ValueError(
                                'The output_stride needs to be a multiple of 4.'
                            )
                        output_stride /= 4
                    # We do not include batch normalization or activation functions in
                    # conv1 because the first ResNet unit will perform these. Cf.
                    # Appendix of [2].
                    with slim.arg_scope([slim.conv2d],
                                        activation_fn=None,
                                        normalizer_fn=None):
                        net = resnet_utils.conv2d_same(net,
                                                       64,
                                                       7,
                                                       stride=2,
                                                       scope='conv1')
                    net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
                net = resnet_utils.stack_blocks_dense(net, blocks,
                                                      output_stride)
                # This is needed because the pre-activation variant does not have batch
                # normalization or activation functions in the residual unit output. See
                # Appendix of [2].
                net = slim.batch_norm(net,
                                      activation_fn=tf.nn.relu,
                                      scope='postnorm')
                # Convert end_points_collection into a dictionary of end_points.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)

                if global_pool:
                    # Global average pooling.
                    net = tf.reduce_mean(input_tensor=net,
                                         axis=[1, 2],
                                         name='pool5',
                                         keepdims=True)
                    end_points['global_pool'] = net
                if num_classes:
                    net = slim.conv2d(net,
                                      num_classes, [1, 1],
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      scope='logits')

                    net = slim.flatten(net)
                    net = slim.fully_connected(net,
                                               num_classes,
                                               activation_fn=None)

                    end_points[sc.name + '/logits'] = net
                    if spatial_squeeze:
                        net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
                        end_points[sc.name + '/spatial_squeeze'] = net
                    end_points['predictions'] = slim.softmax(
                        net, scope='predictions')

                print('------------------------')
                print(net)
                print('------------------------')
                return net, end_points
Пример #43
0
def pfld_inference(input, weight_decay, batch_norm_params):

    coefficient = 1
    with tf.variable_scope('pfld_inference'):
        features = {}
        with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], \
                            activation_fn=tf.nn.relu6,\
                            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                            biases_initializer=tf.zeros_initializer(),
                            weights_regularizer=slim.l2_regularizer(weight_decay),
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            padding='SAME'):
            print('PFLD input shape({}): {}'.format(input.name,
                                                    input.get_shape()))
            # 112*112*3
            conv1 = slim.convolution2d(input,
                                       64 * coefficient, [3, 3],
                                       stride=2,
                                       scope='conv_1')
            print(conv1.name, conv1.get_shape())

            # 56*56*64
            conv2 = slim.separable_convolution2d(conv1,
                                                 num_outputs=None,
                                                 stride=1,
                                                 depth_multiplier=1,
                                                 kernel_size=[3, 3],
                                                 scope='conv2/dwise')
            print(conv2.name, conv2.get_shape())

            # 56*56*64
            conv3_1 = slim.convolution2d(conv2,
                                         128, [1, 1],
                                         stride=2,
                                         scope='conv3_1/expand')
            print(conv3_1.name, conv3_1.get_shape())
            conv3_1 = slim.separable_convolution2d(conv3_1,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv3_1/dwise')
            print(conv3_1.name, conv3_1.get_shape())
            conv3_1 = slim.convolution2d(conv3_1,
                                         64 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv3_1/linear')
            print(conv3_1.name, conv3_1.get_shape())

            conv3_2 = slim.convolution2d(conv3_1,
                                         128, [1, 1],
                                         stride=1,
                                         scope='conv3_2/expand')
            print(conv3_2.name, conv3_2.get_shape())
            conv3_2 = slim.separable_convolution2d(conv3_2,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv3_2/dwise')
            print(conv3_2.name, conv3_2.get_shape())
            conv3_2 = slim.convolution2d(conv3_2,
                                         64 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv3_2/linear')
            print(conv3_2.name, conv3_2.get_shape())

            block3_2 = conv3_1 + conv3_2
            print(block3_2.name, block3_2.get_shape())

            conv3_3 = slim.convolution2d(block3_2,
                                         128, [1, 1],
                                         stride=1,
                                         scope='conv3_3/expand')
            print(conv3_3.name, conv3_3.get_shape())
            conv3_3 = slim.separable_convolution2d(conv3_3,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv3_3/dwise')
            print(conv3_3.name, conv3_3.get_shape())
            conv3_3 = slim.convolution2d(conv3_3,
                                         64 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv3_3linear')
            print(conv3_3.name, conv3_3.get_shape())

            block3_3 = block3_2 + conv3_3
            print(block3_3.name, block3_3.get_shape())

            conv3_4 = slim.convolution2d(block3_3,
                                         128, [1, 1],
                                         stride=1,
                                         scope='conv3_4/expand')
            print(conv3_4.name, conv3_4.get_shape())
            conv3_4 = slim.separable_convolution2d(conv3_4,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv3_4/dwise')
            print(conv3_4.name, conv3_4.get_shape())
            conv3_4 = slim.convolution2d(conv3_4,
                                         64 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv3_4/linear')
            print(conv3_4.name, conv3_4.get_shape())

            block3_4 = block3_3 + conv3_4
            print(block3_4.name, block3_4.get_shape())

            conv3_5 = slim.convolution2d(block3_4,
                                         128, [1, 1],
                                         stride=1,
                                         scope='conv3_5/expand')
            print(conv3_5.name, conv3_5.get_shape())
            conv3_5 = slim.separable_convolution2d(conv3_5,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv3_5/dwise')
            print(conv3_5.name, conv3_5.get_shape())
            conv3_5 = slim.convolution2d(conv3_5,
                                         64 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv3_5/linear')
            print(conv3_5.name, conv3_5.get_shape())

            block3_5 = block3_4 + conv3_5
            print(block3_5.name, block3_5.get_shape())

            features['auxiliary_input'] = block3_5

            #28*28*64
            conv4_1 = slim.convolution2d(block3_5,
                                         128, [1, 1],
                                         stride=2,
                                         scope='conv4_1/expand')
            print(conv4_1.name, conv4_1.get_shape())
            conv4_1 = slim.separable_convolution2d(conv4_1,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv4_1/dwise')
            print(conv4_1.name, conv4_1.get_shape())
            conv4_1 = slim.convolution2d(conv4_1,
                                         128 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv4_1/linear')
            print(conv4_1.name, conv4_1.get_shape())

            #14*14*128
            conv5_1 = slim.convolution2d(conv4_1,
                                         512, [1, 1],
                                         stride=1,
                                         scope='conv5_1/expand')
            print(conv5_1.name, conv5_1.get_shape())
            conv5_1 = slim.separable_convolution2d(conv5_1,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_1/dwise')
            print(conv5_1.name, conv5_1.get_shape())
            conv5_1 = slim.convolution2d(conv5_1,
                                         128 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_1/linear')
            print(conv5_1.name, conv5_1.get_shape())

            conv5_2 = slim.convolution2d(conv5_1,
                                         512, [1, 1],
                                         stride=1,
                                         scope='conv5_2/expand')
            print(conv5_2.name, conv5_2.get_shape())
            conv5_2 = slim.separable_convolution2d(conv5_2,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_2/dwise')
            print(conv5_2.name, conv5_2.get_shape())
            conv5_2 = slim.convolution2d(conv5_2,
                                         128 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_2/linear')
            print(conv5_2.name, conv5_2.get_shape())

            block5_2 = conv5_1 + conv5_2
            print(block5_2.name, block5_2.get_shape())

            conv5_3 = slim.convolution2d(block5_2,
                                         512, [1, 1],
                                         stride=1,
                                         scope='conv5_3/expand')
            print(conv5_3.name, conv5_3.get_shape())
            conv5_3 = slim.separable_convolution2d(conv5_3,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_3/dwise')
            print(conv5_3.name, conv5_3.get_shape())
            conv5_3 = slim.convolution2d(conv5_3,
                                         128 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_3/linear')
            print(conv5_3.name, conv5_3.get_shape())

            block5_3 = block5_2 + conv5_3
            print(block5_3.name, block5_3.get_shape())

            conv5_4 = slim.convolution2d(block5_3,
                                         512, [1, 1],
                                         stride=1,
                                         scope='conv5_4/expand')
            print(conv5_4.name, conv5_4.get_shape())
            conv5_4 = slim.separable_convolution2d(conv5_4,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_4/dwise')
            print(conv5_4.name, conv5_4.get_shape())
            conv5_4 = slim.convolution2d(conv5_4,
                                         128 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_4/linear')
            print(conv5_4.name, conv5_4.get_shape())

            block5_4 = block5_3 + conv5_4
            print(block5_4.name, block5_4.get_shape())

            conv5_5 = slim.convolution2d(block5_4,
                                         512, [1, 1],
                                         stride=1,
                                         scope='conv5_5/expand')
            print(conv5_5.name, conv5_5.get_shape())
            conv5_5 = slim.separable_convolution2d(conv5_5,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_5/dwise')
            print(conv5_5.name, conv5_5.get_shape())
            conv5_5 = slim.convolution2d(conv5_5,
                                         128 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_5/linear')
            print(conv5_5.name, conv5_5.get_shape())

            block5_5 = block5_4 + conv5_5
            print(block5_5.name, block5_5.get_shape())

            conv5_6 = slim.convolution2d(block5_5,
                                         512, [1, 1],
                                         stride=1,
                                         scope='conv5_6/expand')
            print(conv5_6.name, conv5_6.get_shape())
            conv5_6 = slim.separable_convolution2d(conv5_6,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_6/dwise')
            print(conv5_6.name, conv5_6.get_shape())
            conv5_6 = slim.convolution2d(conv5_6,
                                         128 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_6/linear')
            print(conv5_6.name, conv5_6.get_shape())

            block5_6 = block5_5 + conv5_6
            print(block5_6.name, block5_6.get_shape())

            #14*14*128
            conv6_1 = slim.convolution2d(block5_6,
                                         256, [1, 1],
                                         stride=1,
                                         scope='conv6_1/expand')
            print(conv6_1.name, conv6_1.get_shape())
            conv6_1 = slim.separable_convolution2d(conv6_1,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv6_1/dwise')
            print(conv6_1.name, conv6_1.get_shape())
            conv6_1 = slim.convolution2d(conv6_1,
                                         16 * coefficient, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv6_1/linear')
            print(conv6_1.name, conv6_1.get_shape())

            #14*14*16
            conv7 = slim.convolution2d(conv6_1,
                                       32 * coefficient, [3, 3],
                                       stride=2,
                                       scope='conv7')
            print(conv7.name, conv7.get_shape())

            #7*7*32
            conv8 = slim.convolution2d(conv7,
                                       128 * coefficient, [7, 7],
                                       stride=1,
                                       scope='conv8',
                                       padding='VALID')
            print(conv8.name, conv8.get_shape())

            avg_pool1 = slim.avg_pool2d(
                conv6_1, [conv6_1.get_shape()[1],
                          conv6_1.get_shape()[2]],
                stride=1)
            print(avg_pool1.name, avg_pool1.get_shape())

            avg_pool2 = slim.avg_pool2d(
                conv7, [conv7.get_shape()[1],
                        conv7.get_shape()[2]], stride=1)
            print(avg_pool2.name, avg_pool2.get_shape())

            s1 = slim.flatten(avg_pool1)
            s2 = slim.flatten(avg_pool2)
            #1*1*128
            s3 = slim.flatten(conv8)
            multi_scale = tf.concat([s1, s2, s3], 1)
            landmarks = slim.fully_connected(multi_scale,
                                             num_outputs=196,
                                             activation_fn=None,
                                             scope='fc')
            return features, landmarks
Пример #44
0
  def build_network(self, sess, is_training=True):
    #  pyramid network scales changes at different levels of pyramid
    self._anchor_scales = {}
 
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
    bottleneck = resnet_v1.bottleneck
    # choose different blocks for different number of layers
    if self._num_layers == 50:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 101:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 152:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 7 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    else:
      # other numbers are not supported
      raise NotImplementedError
    assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 4)
    if cfg.RESNET.FIXED_BLOCKS == 4:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net_conv4, endpoints = resnet_v1.resnet_v1(net,
                                           blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    elif cfg.RESNET.FIXED_BLOCKS > 0:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net, endpoints = resnet_v1.resnet_v1(net,
                                     blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net_conv4, endpoints = resnet_v1.resnet_v1(net,
                                           blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    else:  # cfg.RESNET.FIXED_BLOCKS == 0
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net = self.build_base()
        net_conv4, endpoints = resnet_v1.resnet_v1(net,
                                           blocks[0:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    pyramid = self.build_pyramid(endpoints)
    self._layers['head'] = net_conv4  # not sure what to do with this
    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      for i in range(5, 1, -1):
        p = i
        self._act_summaries.append(pyramid[p])
        self._feat_stride[p] = [2 ** i]
        shape = tf.shape(pyramid[p])
        h, w = shape[1], shape[2]
        
        #  in the paper they use only one anchor per layer of pyramid. But when I tried that we were frequently receiving no overlaps in anchor_target_proposal(...) 
        self._anchor_scales[p] = [2**(i-j) for j in range(self._num_scales-1, -1, -1)]
        self._anchor_component(p, h, w)

        # build the anchors for the image
        # rpn
        rpn = slim.conv2d(pyramid[p], 256, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3", stride=1)
        self._act_summaries.append(rpn)
        rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_cls_score')
        # change it so that the score has 2 as its channel size
        rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
        rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
        rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
        rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
      
        if is_training:
          rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p)
          rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor", p)
          # Try to have a determinestic order for the computing graph, for reproducibility
          with tf.control_dependencies([rpn_labels]):
            rois, roi_scores = self._proposal_target_layer(rois, roi_scores, "rpn_rois", p)
        else:
          if cfg.TEST.MODE == 'nms':
            rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p)
          elif cfg.TEST.MODE == 'top':
            rois, roi_scores = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p)
          else:
            raise NotImplementedError
        self._predictions[p]['rois'] = rois
        self._predictions[p]['roi_scores'] = roi_scores
        self._predictions[p]['rpn_cls_score'] = rpn_cls_score 
        self._predictions[p]['rpn_cls_score_reshape'] = rpn_cls_score_reshape
        self._predictions[p]['rpn_cls_prob'] = rpn_cls_prob
        self._predictions[p]['rpn_bbox_pred'] = rpn_bbox_pred
    
    all_roi_scores = tf.concat(values=[self._predictions[p]['roi_scores'] for p in pyramid], axis=0)
    all_rois = tf.concat(values=[self._predictions[p]['rois'] for p in pyramid], axis=0)
    p_vals = [tf.fill([tf.shape(self._predictions[p]['roi_scores'])[0], 1], float(p)) for p in pyramid]
    p_roi = tf.concat(values=[tf.reshape(p_vals, [-1, 1]), all_rois], axis=1)
    
    if is_training:
      all_proposal_target_labels = tf.concat(values=[self._proposal_targets[p]['labels'] for p in pyramid], axis=0)
      all_proposal_target_bbox = tf.concat(values=[self._proposal_targets[p]['bbox_targets'] for p in pyramid], axis=0)
      all_proposal_target_inside_w = tf.concat(values=[self._proposal_targets[p]['bbox_inside_weights'] for p in pyramid], axis=0)
      all_proposal_target_outside_w = tf.concat(values=[self._proposal_targets[p]['bbox_outside_weights'] for p in pyramid], axis=0)

    cfg_key = self._mode
    if type(cfg_key) == bytes:
      cfg_key = cfg_key.decode('utf-8')
    nms_top_n = all_roi_scores.shape[0] \
                    if all_roi_scores.shape[0] < cfg[cfg_key].RPN_POST_NMS_TOP_N \
                    else cfg[cfg_key].RPN_POST_NMS_TOP_N
    _, top_indices = tf.nn.top_k(tf.reshape(all_roi_scores, [-1]), k=nms_top_n)
    p_roi = tf.gather(p_roi, top_indices)
    
    [assigned_rois, _, _] = \
                assign_boxes(all_rois, [all_rois, top_indices], [2, 3, 4, 5], 'assign_boxes')

    for p in range(5, 1, -1):
      splitted_rois = assigned_rois[p-2]

      # rcnn 
      if cfg.POOLING_MODE == 'crop':
        cropped_roi = self._crop_pool_layer(pyramid[p], splitted_rois, "cropped_roi", p) 
        self._predictions[p]['cropped_roi'] = cropped_roi
      else:
        raise NotImplementedError
    cropped_rois = [self._predictions[p_layer]['cropped_roi'] for p_layer in self._predictions]
    cropped_rois = tf.concat(values=cropped_rois, axis=0)


    cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME')
    refine = slim.flatten(cropped_regions)
    refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu)
    refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
    refine = slim.fully_connected(refine,  1024, activation_fn=tf.nn.relu)
    refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
    cls_score = slim.fully_connected(refine, self._num_classes, activation_fn=None, 
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01))
    cls_prob = self._softmax_layer(cls_score, "cls_prob")
    bbox_pred = slim.fully_connected(refine, self._num_classes*4, activation_fn=my_sigmoid, 
            weights_initializer=tf.truncated_normal_initializer(stddev=0.001))

    self._predictions["cls_score"] = cls_score
    self._predictions["cls_prob"] = cls_prob
    self._predictions["bbox_pred"] = bbox_pred
    self._predictions["rois"] = tf.gather(all_rois, top_indices)
    
    if is_training:
      self._proposal_targets['labels'] = all_proposal_target_labels 
      self._proposal_targets['bbox_targets'] = all_proposal_target_bbox
      self._proposal_targets['bbox_inside_weights'] = all_proposal_target_inside_w
      self._proposal_targets['bbox_outside_weights'] = all_proposal_target_outside_w

    #self._score_summaries.update(self._predictions) #  score summaries not compatible w/ dict
    return self._predictions["rois"], cls_prob, bbox_pred
Пример #45
0
    def get_model(self, inputs, weight_decay=0.0005, is_training=False):
        # End_points collect relevant activations for external use.
        arg_scope = self.__arg_scope(weight_decay=weight_decay)
        self.img_shape = tfe.get_shape(inputs)[1:3]
        with slim.arg_scope(arg_scope):
            end_points = {}
            channels = {}
            with tf.variable_scope('vgg_16', [inputs]):
                # Original VGG-16 blocks.
                net = slim.repeat(inputs,
                                  2,
                                  slim.conv2d,
                                  64, [3, 3],
                                  scope='conv1')
                end_points['block1'] = net
                net = slim.max_pool2d(net, [2, 2], scope='pool1')
                # Block 2.
                net = slim.repeat(net,
                                  2,
                                  slim.conv2d,
                                  128, [3, 3],
                                  scope='conv2')
                end_points['block2'] = net
                net = slim.max_pool2d(net, [2, 2], scope='pool2')
                # Block 3.
                net = slim.repeat(net,
                                  3,
                                  slim.conv2d,
                                  256, [3, 3],
                                  scope='conv3')
                end_points['block3'] = net
                channels['block3'] = 256
                self.layer_shape.append(tfe.get_shape(net)[1:3])
                net = slim.max_pool2d(net, [2, 2], scope='pool3')
                # Block 4.
                net = slim.repeat(net,
                                  3,
                                  slim.conv2d,
                                  512, [3, 3],
                                  scope='conv4')
                end_points['block4'] = net
                channels['block4'] = 512
                self.layer_shape.append(tfe.get_shape(net)[1:3])
                net = slim.max_pool2d(net, [2, 2], scope='pool4')
                # Block 5.
                net = slim.repeat(net,
                                  3,
                                  slim.conv2d,
                                  512, [3, 3],
                                  scope='conv5')
                end_points['block5'] = net
                channels['block5'] = 512
                self.layer_shape.append(tfe.get_shape(net)[1:3])
                net = slim.max_pool2d(net, [2, 2], scope='pool5')

            # Additional SSD blocks.
            #with slim.arg_scope([slim.conv2d],
            #activation_fn=None):
            #with slim.arg_scope([slim.batch_norm],
            #activation_fn=tf.nn.relu, is_training=is_training,updates_collections=None):
            #with slim.arg_scope([slim.dropout],
            #is_training=is_training,keep_prob=0.8):
            with tf.variable_scope(self.model_name):
                return self.__additional_ssd_block(end_points,
                                                   channels,
                                                   net,
                                                   is_training=is_training)
Пример #46
0
    def interface_cloudMattingNet(self, inputs, reuse=None, is_training=True):
        endpoints = {}
        with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)):
            with tf.variable_scope('cloud_net',
                                   'cloud_net', [inputs],
                                   reuse=reuse):
                with tf.variable_scope('feature_exatraction'):
                    nets = slim.repeat(inputs,
                                       2,
                                       slim.conv2d,
                                       64, [3, 3],
                                       scope='conv1')
                    endpoints['net1'] = nets
                    nets = slim.conv2d(nets,
                                       64, [3, 3],
                                       stride=2,
                                       scope='pool1')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       128, [3, 3],
                                       scope='conv2')
                    endpoints['net2'] = nets
                    nets = slim.conv2d(nets,
                                       128, [3, 3],
                                       stride=2,
                                       scope='pool2')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       128, [3, 3],
                                       scope='conv3')
                    endpoints['net3'] = nets
                    nets = slim.conv2d(nets,
                                       128, [3, 3],
                                       stride=2,
                                       scope='pool3')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       256, [3, 3],
                                       scope='conv4')
                    endpoints['net4'] = nets
                    nets = slim.conv2d(nets,
                                       256, [3, 3],
                                       stride=2,
                                       scope='pool4')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       512, [3, 3],
                                       scope='conv5')
                    endpoints['net5'] = nets
                    nets = slim.conv2d(nets,
                                       512, [3, 3],
                                       stride=2,
                                       scope='pool5')

                    nets = slim.repeat(nets,
                                       2,
                                       slim.conv2d,
                                       512, [3, 3],
                                       scope='conv6')
                    endpoints['net6'] = nets
                    nets = slim.conv2d(nets,
                                       512, [3, 3],
                                       stride=2,
                                       scope='pool6')
                    nets = slim.conv2d(nets, 512, [3, 3], scope='conv7')
                    endpoints['net7'] = nets

                with tf.variable_scope('alpha_prediction'):
                    # alpha prediction
                    nets = endpoints['net7']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans1') + endpoints['net6']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans2') + endpoints['net5']
                    nets = slim.conv2d_transpose(
                        nets, 256, [3, 3], stride=2,
                        scope='conv_trans3') + endpoints['net4']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans4') + endpoints['net3']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans5') + endpoints['net2']
                    nets = slim.conv2d_transpose(
                        nets, 64, [3, 3], stride=2,
                        scope='conv_trans6') + endpoints['net1']
                    alpha_logits = slim.conv2d(nets,
                                               self.alpha_channel, [3, 3],
                                               scope='pred',
                                               activation_fn=None)

                with tf.variable_scope('reflectance_prediction'):
                    # reflectance prediction
                    nets = endpoints['net7']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans1') + endpoints['net6']
                    nets = slim.conv2d_transpose(
                        nets, 512, [3, 3], stride=2,
                        scope='conv_trans2') + endpoints['net5']
                    nets = slim.conv2d_transpose(
                        nets, 256, [3, 3], stride=2,
                        scope='conv_trans3') + endpoints['net4']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans4') + endpoints['net3']
                    nets = slim.conv2d_transpose(
                        nets, 128, [3, 3], stride=2,
                        scope='conv_trans5') + endpoints['net2']
                    nets = slim.conv2d_transpose(
                        nets, 64, [3, 3], stride=2,
                        scope='conv_trans6') + endpoints['net1']
                    reflectance_logits = slim.conv2d(nets,
                                                     self.reflectance_channel,
                                                     [3, 3],
                                                     scope='pred',
                                                     activation_fn=None)
        return alpha_logits, reflectance_logits
Пример #47
0
mnist = input_data.read_data_sets("MNIST_data/",one_hot = True)
sess = tf.InteractiveSession()
 
x = tf.placeholder(tf.float32,[None,784])
y = tf.placeholder(tf.float32,[None,10])
x_image = tf.reshape(x,[-1,28,28,1])

# LeNet
# replaced sigmoid with ReLU
# add dropout
keep_prob = tf.placeholder(tf.float32)

# Conv1 Layer
with slim.arg_scope([slim.conv2d],padding='SAME',
            weights_initializer=tf.contrib.layers.xavier_initializer(),# this is default in slim.conv2d
            weights_regularizer=slim.l2_regularizer(0.005)):

    # MNIST conv1: 28*28*1 -> 25*25*16 -> 23*23*16
    conv1 = slim.conv2d(x_image,16,[3,3],stride=1,scope='conv1')
    pool1 = slim.max_pool2d(conv1,[2,2],stride=1,scope='pool1')
    #lrn1 = tf.nn.lrn(pool1,2,1,1e-4,0.75,name='lrn1')

    # MNIST conv1: 23*23*16 -> 20*20*64 -> 18*18*64
    conv2 = slim.conv2d(pool1,64,[3,3],stride=1,scope='conv2')
    pool2 = slim.max_pool2d(conv2,[2,2],stride=1,scope='pool2')
    #lrn2 = tf.nn.lrn(pool2,2,1,1e-4,0.75,name='lrn2')

    # conv3: 18*18*64 -> 8*8*128
    conv3 = slim.conv2d(pool2,384,[2,2],stride=2,scope='conv3')
Пример #48
0
crop_size = [224, 224]
batch_size = 120
output_size = 1001
mean_file = './input/meanIm.npy'

train_dataset = './input/train_by_hotel.txt'
train_data = CombinatorialTripletSet(train_dataset,
                                     mean_file,
                                     img_size,
                                     crop_size,
                                     isTraining=False)
image_batch = tf.placeholder(tf.float32,
                             shape=[None, crop_size[0], crop_size[0], 3])

print("Preparing network...")
with slim.arg_scope(resnet_v2.resnet_arg_scope()):
    _, layers = resnet_v2.resnet_v2_50(image_batch,
                                       num_classes=output_size,
                                       is_training=False)

featLayer = 'resnet_v2_50/logits'
feat = tf.squeeze(tf.nn.l2_normalize(layers[featLayer], 3))
c = tf.ConfigProto()
c.gpu_options.visible_device_list = str(whichGPU)
sess = tf.Session(config=c)
saver = tf.train.Saver()
saver.restore(sess, pretrained_net)

train_ims = []
train_classes = []
for ims, cls in zip(train_data.files, train_data.classes):
Пример #49
0
def alexnet_model(inputs,
                  is_training=True,
                  augmentation_function=None,
                  emb_size=128,
                  l2_weight=1e-4,
                  img_shape=None,
                  new_shape=None,
                  image_summary=False,
                  batch_norm_decay=0.99):
    """Mostly identical to slim.nets.alexnt, except for the reverted fc layers"""

    from tensorflow.contrib import layers
    from tensorflow.contrib.framework.python.ops import arg_scope
    from tensorflow.contrib.layers.python.layers import layers as layers_lib
    from tensorflow.contrib.layers.python.layers import regularizers
    from tensorflow.python.ops import init_ops
    from tensorflow.python.ops import nn_ops
    from tensorflow.python.ops import variable_scope

    trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(
        0.0, stddev)

    def alexnet_v2_arg_scope(weight_decay=0.0005):
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected],
                activation_fn=nn_ops.relu,
                biases_initializer=init_ops.constant_initializer(0.1),
                weights_regularizer=regularizers.l2_regularizer(weight_decay)):
            with arg_scope([layers.conv2d], padding='SAME'):
                with arg_scope([layers_lib.max_pool2d],
                               padding='VALID') as arg_sc:
                    return arg_sc

    def alexnet_v2(inputs,
                   is_training=True,
                   emb_size=4096,
                   dropout_keep_prob=0.5,
                   scope='alexnet_v2'):

        inputs = tf.cast(inputs, tf.float32)
        if new_shape is not None:
            shape = new_shape
            inputs = tf.image.resize_images(
                inputs,
                tf.constant(new_shape[:2]),
                method=tf.image.ResizeMethod.BILINEAR)
        else:
            shape = img_shape
        if is_training and augmentation_function is not None:
            inputs = augmentation_function(inputs, shape)
        if image_summary:
            tf.summary.image('Inputs', inputs, max_outputs=3)

        net = inputs
        mean = tf.reduce_mean(net, [1, 2], True)
        std = tf.reduce_mean(tf.square(net - mean), [1, 2], True)
        net = (net - mean) / (std + 1e-5)
        inputs = net

        with variable_scope.variable_scope(scope, 'alexnet_v2',
                                           [inputs]) as sc:
            end_points_collection = sc.original_name_scope + '_end_points'

            # Collect outputs for conv2d, fully_connected and max_pool2d.
            with arg_scope([
                    layers.conv2d, layers_lib.fully_connected,
                    layers_lib.max_pool2d
            ],
                           outputs_collections=[end_points_collection]):
                net = layers.conv2d(inputs,
                                    64, [11, 11],
                                    4,
                                    padding='VALID',
                                    scope='conv1')
                net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
                net = layers.conv2d(net, 192, [5, 5], scope='conv2')
                net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
                net = layers.conv2d(net, 384, [3, 3], scope='conv3')
                net = layers.conv2d(net, 384, [3, 3], scope='conv4')
                net = layers.conv2d(net, 256, [3, 3], scope='conv5')
                net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')

                net = slim.flatten(net, scope='flatten')

                # Use conv2d instead of fully_connected layers.
                with arg_scope(
                    [slim.fully_connected],
                        weights_initializer=trunc_normal(0.005),
                        biases_initializer=init_ops.constant_initializer(0.1)):
                    net = layers.fully_connected(net, 4096, scope='fc6')
                    net = layers_lib.dropout(net,
                                             dropout_keep_prob,
                                             is_training=is_training,
                                             scope='dropout6')
                    net = layers.fully_connected(net, emb_size, scope='fc7')

        return net

    with slim.arg_scope(alexnet_v2_arg_scope()):
        return alexnet_v2(inputs, is_training, emb_size)
Пример #50
0
def inception_v3_base(inputs,scope=None):
    end_points = {}
    with tf.variable_scope(scope,"Inception_v3",[inputs]):
        with slim.arg_scope([slim.conv2d,slim.max_pool2d,slim.avg_pool2d],stride=1,padding="VALID"):
            net = slim.conv2d(inputs,num_outputs=32,kernel_size=[3,3],stride=2,scope="Conv2d_1a_3x3")
            net = slim.conv2d(net,num_outputs=32,kernel_size=[3,3],scope="Conv2d_2a_3x3")
            net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],padding="SAME",scope="Conv2d_2b_3x3")
            net = slim.max_pool2d(net,kernel_size=[3,3],stride=2,scope="MaxPool_3a_3x3")
            net = slim.conv2d(net,num_outputs=80,kernel_size=[1,1],scope="Conv2d_3b_1x1")
            net = slim.conv2d(net,num_outputs=192,kernel_size=[3,3],scope="Conv2d_4a_3x3")
            net = slim.max_pool2d(net,kernel_size=[3,3],stride=2,scope="MaxPool_5a_3x3")

        # 定义第一个inception模块组
        with slim.arg_scope([slim.conv2d,slim.max_pool2d,slim.avg_pool2d],sstride=1,padding="SAME"):
            # 定义第一个inception模块组中的第一个inception module
            with tf.variable_scope("Mixed_5b"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net,num_outputs=64,kernel_size=[1,1],scope="Conv2d_0a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net,num_outputs=48,kernel_size=[1,1],scope="Conv2d_0a_1x1")
                    batch_1 = slim.conv2d(batch_1,num_outputs=64,kernel_size=[5,5],scope="Conv2d_0b_5x5")
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.conv2d(net,num_outputs=64,kernel_size=[1,1],scope="Conv2d_0a_1x1")
                    batch_2 = slim.conv2d(batch_2,num_outputs=96,kernel_size=[3,3],scope="Conv2d_0b_3x3")
                    batch_2 = slim.conv2d(batch_2,num_outputs=96,kernel_size=[3,3],scope="Conv2d_0c_3x3")
                with tf.variable_scope("Branch_3"):
                    batch_3 = slim.avg_pool2d(net,kernel_size=[3,3],scope="AvgPool_0a_3x3")
                    batch_3 = slim.conv2d(batch_3,num_outputs=32,kernel_size=[1,1],scope="Conv2d_0b_1x1")
                net = tf.concat([batch_0,batch_1,batch_2,batch_3],3)

            # 定义第一个inception模块组中的第二个inception module
            with tf.variable_scope("Mixed_5c"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=48, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
                    batch_1 = slim.conv2d(batch_1, num_outputs=64, kernel_size=[5, 5], scope="Conv2d_0c_5x5")
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0b_3x3")
                    batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0c_3x3")
                with tf.variable_scope("Branch_3"):
                    batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
                    batch_3 = slim.conv2d(batch_3, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
                net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)

            # 定义第一个inception模块组中的第三个inception module
            with tf.variable_scope("Mixed_5c"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=48, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
                    batch_1 = slim.conv2d(batch_1, num_outputs=64, kernel_size=[5, 5], scope="Conv2d_0c_5x5")
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0b_3x3")
                    batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0c_3x3")
                with tf.variable_scope("Branch_3"):
                    batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
                    batch_3 = slim.conv2d(batch_3, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0b_1x1")
                net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)

            # 定义第二个inception模块组中的第一个inception module
            with tf.variable_scope("Mixed_6a"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=384, kernel_size=[3, 3],
                                          stride=2, padding="VALID", scope="Conv2d_1a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_1 = slim.conv2d(batch_1, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0b_3x3")
                    batch_1 = slim.conv2d(batch_1, num_outputs=96, kernel_size=[3, 3],
                                          stride=2, padding="VALID", scope="Conv2d_1a_1x1")
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, padding="VALID",
                                              scope="MaxPool_1a_3x3")

                net = tf.concat([batch_0, batch_1, batch_2], 3)

            # 定义第二个inception模块组中的第二个inception module
            with tf.variable_scope("Mixed_6b"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=128, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_1 = slim.conv2d(batch_1, num_outputs=128, kernel_size=[1, 7], scope="Conv2d_0b_1x7")
                    batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[7, 1], scope="Conv2d_0c_7x1")
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.conv2d(net, num_outputs=128, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=128, kernel_size=[7, 1], scope="Conv2d_0b_7x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=128, kernel_size=[1, 7], scope="Conv2d_0c_1x7")
                    batch_2 = slim.conv2d(batch_2, num_outputs=128, kernel_size=[7, 1], scope="Conv2d_0d_7x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7")
                with tf.variable_scope("Branch_3"):
                    batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
                    batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")

                net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)

            # 定义第二个inception模块组中的第三个inception module
            with tf.variable_scope("Mixed_6c"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0b_1x7")
                    batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0c_7x1")
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0b_7x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0c_1x7")
                    batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0d_7x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7")
                with tf.variable_scope("Branch_3"):
                    batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
                    batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")

                net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)

            # 定义第二个inception模块组中的第四个inception module
            with tf.variable_scope("Mixed_6d"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0b_1x7")
                    batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0c_7x1")
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0b_7x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0c_1x7")
                    batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0d_7x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7")
                with tf.variable_scope("Branch_3"):
                    batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
                    batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")

                net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)

            # 定义第二个inception模块组中的第五个inception module
            with tf.variable_scope("Mixed_6e"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0b_1x7")
                    batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0c_7x1")
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0b_7x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0c_1x7")
                    batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0d_7x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7")
                with tf.variable_scope("Branch_3"):
                    batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
                    batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")

                net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)

            end_points["Mixed_6e"] = net

            # 定义第三个inception模块组中的第一个inception module
            with tf.variable_scope("Mixed_7a"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_0 = slim.conv2d(net, num_outputs=320, kernel_size=[3, 3], stride=2,
                                          padding="VALID", scope="Conv2d_1a_3x3")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0b_1x7")
                    batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[7, 1], scope="Conv2d_0c_7x1")
                    batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[3, 3], stride=2,
                                          padding="VALID", scope="Conv2d_1a_3x3")
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, padding="VALID",
                                              scope="MaxPool_1a_3x3")

                net = tf.concat([batch_0, batch_1, batch_2], 3)

            # 定义第三个inception模块组中的第二个inception module
            with tf.variable_scope("Mixed_7b"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=320, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=384, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_1 = tf.concat([
                        slim.conv2d(batch_1, num_outputs=384, kernel_size=[1, 3], scope="Conv2d_0b_1x3"),
                        slim.conv2d(batch_1, num_outputs=384, kernel_size=[3, 1], scope="Conv2d_0b_3x1")], axis=3)
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.conv2d(net, num_outputs=448, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=384, kernel_size=[3, 3], scope="Conv2d_0b_3x3")
                    batch_2 = tf.concat([
                        slim.conv2d(batch_2, num_outputs=384, kernel_size=[1, 3], scope="Conv2d_0c_1x3"),
                        slim.conv2d(batch_2, num_outputs=384, kernel_size=[3, 1], scope="Conv2d_0d_3x1")], axis=3)
                with tf.variable_scope("Branch_3"):
                    batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
                    batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")

            net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)

            # 定义第三个inception模块组中的第三个inception module
            with tf.variable_scope("Mixed_7c"):
                with tf.variable_scope("Branch_0"):
                    batch_0 = slim.conv2d(net, num_outputs=320, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                with tf.variable_scope("Branch_1"):
                    batch_1 = slim.conv2d(net, num_outputs=384, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_1 = tf.concat([
                        slim.conv2d(batch_1, num_outputs=384, kernel_size=[1, 3], scope="Conv2d_0b_1x3"),
                        slim.conv2d(batch_1, num_outputs=384, kernel_size=[3, 1], scope="Conv2d_0b_3x1")], axis=3)
                with tf.variable_scope("Branch_2"):
                    batch_2 = slim.conv2d(net, num_outputs=448, kernel_size=[1, 1], scope="Conv2d_0a_1x1")
                    batch_2 = slim.conv2d(batch_2, num_outputs=384, kernel_size=[3, 3], scope="Conv2d_0b_3x3")
                    batch_2 = tf.concat([
                        slim.conv2d(batch_2, num_outputs=384, kernel_size=[1, 3], scope="Conv2d_0c_1x3"),
                        slim.conv2d(batch_2, num_outputs=384, kernel_size=[3, 1], scope="Conv2d_0d_3x1")], axis=3)
                with tf.variable_scope("Branch_3"):
                    batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3")
                    batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1")

            net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3)

        return net,end_points
def get_model(model_in, dropout_keeprate_node, train_config, scope):

    net = model_in
    with tf.variable_scope(name_or_scope=scope, values=[model_in]):

        # batch norm arg_scope
        with slim.arg_scope([train_config.normalizer_fn],
                            decay=train_config.batch_norm_decay,
                            fused=train_config.batch_norm_fused,
                            is_training=train_config.is_trainable,
                            activation_fn=train_config.activation_fn):

            if train_config.normalizer_fn == None:
                conv_activation_fn = train_config.activation_fn
            else:
                conv_activation_fn = None
            # max_pool arg_scope
            with slim.arg_scope([slim.max_pool2d],
                                stride=model_config['maxpool_stride'],
                                kernel_size=model_config['maxpool_ksize'],
                                padding='VALID'):

                # convolutional layer arg_scope
                with slim.arg_scope(
                    [slim.conv2d],
                        kernel_size=model_config['conv_ksize'],
                        stride=model_config['conv_stride'],
                        weights_initializer=train_config.weights_initializer,
                        weights_regularizer=train_config.weights_regularizer,
                        biases_initializer=train_config.biases_initializer,
                        trainable=train_config.is_trainable,
                        activation_fn=conv_activation_fn,
                        normalizer_fn=train_config.normalizer_fn):

                    net = slim.conv2d(inputs=net,
                                      num_outputs=model_chout_num['c1'],
                                      padding='SAME',
                                      scope='c1_conv')

                    net = slim.max_pool2d(inputs=net, scope='s2_pool')

                    net = slim.conv2d(inputs=net,
                                      num_outputs=model_chout_num['c3'],
                                      padding='VALID',
                                      scope='c3_conv')

                    net = slim.max_pool2d(inputs=net, scope='s4_pool')

                    net = slim.conv2d(inputs=net,
                                      num_outputs=model_chout_num['c5'],
                                      padding='VALID',
                                      scope='c5_conv')

        # output layer by fully-connected layer
        with slim.arg_scope([slim.fully_connected],
                            trainable=train_config.is_trainable):

            with slim.arg_scope([slim.dropout],
                                keep_prob=dropout_keeprate_node,
                                is_training=train_config.is_trainable):

                net = slim.fully_connected(
                    inputs=net,
                    num_outputs=model_chout_num['f6'],
                    activation_fn=train_config.activation_fn,
                    scope='f6_fc')

                net = slim.dropout(inputs=net, scope='f6_dropout')

                net = slim.fully_connected(inputs=net,
                                           num_outputs=model_chout_num['out'],
                                           activation_fn=None,
                                           scope='out_fc')

                out_logit = slim.dropout(inputs=net, scope='out_dropout')

                out_logit = tf.reshape(out_logit,
                                       shape=[-1, model_chout_num['out']])

        return out_logit
Пример #52
0
def mnist_model_dropout(inputs,
                        is_training=True,
                        emb_size=128,
                        l2_weight=1e-3,
                        batch_norm_decay=None,
                        img_shape=None,
                        new_shape=None,
                        dropout_keep_prob=0.8,
                        augmentation_function=None,
                        image_summary=False):  # pylint: disable=unused-argument
    """Construct the image-to-embedding vector model."""

    inputs = tf.cast(inputs, tf.float32)  # / 255.0
    if new_shape is not None:
        shape = new_shape
        inputs = tf.image.resize_images(inputs,
                                        tf.constant(new_shape[:2]),
                                        method=tf.image.ResizeMethod.BILINEAR)
    else:
        shape = img_shape
    net = inputs

    if is_training and augmentation_function is not None:
        tf.map_fn(lambda frame: augmentation_function(frame), inputs)

    if augmentation_function is not None:
        tf.map_fn(lambda frame: tf.image.per_image_standardization(frame),
                  inputs)

    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        activation_fn=tf.nn.elu,
                        weights_regularizer=slim.l2_regularizer(l2_weight)):
        with slim.arg_scope([slim.dropout], is_training=is_training):

            net = slim.conv2d(net, 32, [3, 3], scope='conv1_1')
            net = slim.conv2d(net, 32, [3, 3], scope='conv1_2')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')  # 14
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout1')

            net = slim.conv2d(net, 64, [3, 3], scope='conv2_1')
            net = slim.conv2d(net, 64, [3, 3], scope='conv2_2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')  # 7
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout2')

            net = slim.conv2d(net, 128, [3, 3], scope='conv3_1')
            net = slim.conv2d(net, 128, [3, 3], scope='conv3_2')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')  # 3

            net = slim.flatten(net, scope='flatten')

            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout3')

            emb = slim.fully_connected(net, emb_size, scope='fc1')

    return emb
Пример #53
0
def yolo2(net, is_training, num_anchors, classes, channel=32, name='yolo2'):
    def batch_norm(net):
        net = slim.batch_norm(net,
                              center=True,
                              scale=True,
                              epsilon=1e-5,
                              is_training=is_training)
        return net

    # Use 1*1 filters to compress the feature representation between 3*3 convolutions
    # Use batch normalization to stabilize training, speed up convergence, regularize the model
    with tf.variable_scope(name):
        with slim.arg_scope([slim.layers.conv2d],
                            kernel_size=[3, 3],
                            stride=1,
                            padding='SAME',
                            normalizer_fn=batch_norm,
                            activation_fn=leaky_relu), slim.arg_scope(
                                [slim.layers.max_pool2d],
                                kernel_size=[2, 2],
                                stride=2,
                                padding='SAME'):
            layer_index = 0
            for _ in range(2):
                net = slim.layers.conv2d(net,
                                         channel,
                                         scope='conv2d_%d' % layer_index)
                print(net.get_shape().as_list())
                net = slim.layers.max_pool2d(net,
                                             scope='max_pool2d_%d' %
                                             layer_index)
                print(net.get_shape().as_list())
                channel *= 2
                layer_index += 1
            # channel=128, layer_index=2
            for _ in range(2):
                net = slim.layers.conv2d(net,
                                         channel,
                                         scope='conv2d_%d' % layer_index)
                print(net.get_shape().as_list())
                layer_index += 1
                net = slim.layers.conv2d(net,
                                         channel / 2,
                                         kernel_size=[1, 1],
                                         scope='conv2d_%d' % layer_index)
                print(net.get_shape().as_list())
                layer_index += 1
                net = slim.layers.conv2d(net,
                                         channel,
                                         scope='conv2d_%d' % layer_index)
                print(net.get_shape().as_list())
                net = slim.layers.max_pool2d(net,
                                             scope='max_pool2d_%d' %
                                             layer_index)
                print(net.get_shape().as_list())
                layer_index += 1
                channel *= 2
            # channel=512
            net = slim.layers.conv2d(net,
                                     channel,
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())
            net = slim.layers.conv2d(net,
                                     channel / 2,
                                     kernel_size=[1, 1],
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())
            net = slim.layers.conv2d(net,
                                     channel,
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())
            net = slim.layers.conv2d(net,
                                     channel / 2,
                                     kernel_size=[1, 1],
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())
            net = slim.layers.conv2d(net,
                                     channel,
                                     scope='conv2d_%d' % layer_index)
            print(net.get_shape().as_list())
            '''
                For passthrough, we copy 26*26 resolution, (26,26,512)
                For localizing smaller objects, simply adding a passthrough layer that brings features from an earlier layer 
            '''
            pt = tf.identity(net, name='passthrough')
            net = slim.layers.max_pool2d(net,
                                         scope='max_pool2d_%d' % layer_index)
            layer_index += 1
            channel *= 2

            # channel=1024
            net = slim.layers.conv2d(net,
                                     channel,
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())
            net = slim.layers.conv2d(net,
                                     channel / 2,
                                     kernel_size=[1, 1],
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())
            net = slim.layers.conv2d(net,
                                     channel,
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())
            net = slim.layers.conv2d(net,
                                     channel / 2,
                                     kernel_size=[1, 1],
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())
            net = slim.layers.conv2d(net,
                                     channel,
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())

            # Add three 3*3 convoultional layers with 1024 filters
            net = slim.layers.conv2d(net,
                                     channel,
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())
            net = slim.layers.conv2d(net,
                                     channel,
                                     scope='conv2d_%d' % layer_index)
            layer_index += 1
            print(net.get_shape().as_list())

            # passthrough layer concatenates (26,26,512) with (13,13,1024) by stacking adjacent features into different channels instead of spatial location
            ''' (6,6) -> (3,2,3,2) -> (3,3,4)
                [[1,2,3,4,5,6],      [[[1,2,5,6],
                 [6,5,4,3,2,1],        [3,4,4,3],
                 [1,2,3,4,5,6],        [5,6,2,1]]], 
                 [6,5,4,3,2,1], ->      ...
                 [1,2,3,4,5,6],       [[3,4,4,3],
                 [6,5,4,3,2,1]]        [5,6,2,1]]
            '''
            pt_shape = pt.get_shape().as_list()
            print('passthrough', pt_shape)
            with tf.name_scope('pass_through'):
                pt_net = tf.reshape(pt, [
                    pt_shape[0],
                    int(pt_shape[1] / 2), 2,
                    int(pt_shape[2] / 2), 2, pt_shape[3]
                ])
                pt_net = tf.transpose(pt_net, [0, 1, 3, 2, 4, 5])
                pt_net = tf.reshape(pt_net, [
                    pt_shape[0],
                    int(pt_shape[1] / 2),
                    int(pt_shape[2] / 2), pt_shape[3] * 2 * 2
                ])
                print(pt_net.get_shape().as_list())
            # pt_net: (13,13,2048)
            net = tf.concat([net, pt_net], axis=3, name='concat_pt')
            # Add a passthrough layer to the second to last convolutional layer
            net = slim.layers.conv2d(net,
                                     channel,
                                     scope='conv2d_%d' % layer_index)
            print(net.get_shape().as_list())
        # Remove fully connected layers, instead add final 1*1 convolustional layers with the number of outputs we need for detection
        # Predict boxes with 5 coordinates each and 20 classes per box -> 125 filters
        net = slim.layers.conv2d(net,
                                 num_anchors * (5 + classes),
                                 kernel_size=[1, 1],
                                 activation_fn=None,
                                 scope='final')
        print(net.get_shape().as_list())

        return net
Пример #54
0
def train():

    faster_rcnn = build_whole_network.DetectionNetwork(
        base_network_name=cfgs.NET_NAME, is_training=True)

    with tf.name_scope('get_batch'):
        img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \
            next_batch(dataset_name=cfgs.DATASET_NAME,  # 'pascal', 'coco'
                       batch_size=cfgs.BATCH_SIZE,
                       shortside_len=cfgs.IMG_SHORT_SIDE_LEN,
                       is_training=True)

        gtboxes_and_label = tf.py_func(
            back_forward_convert,
            inp=[tf.squeeze(gtboxes_and_label_batch, 0)],
            Tout=tf.float32)
        gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6])

    with tf.name_scope('draw_gtboxes'):

        gtboxes_in_img = draw_box_with_color_rotate(
            img_batch,
            tf.reshape(gtboxes_and_label, [-1, 6])[:, :-1],
            text=tf.shape(gtboxes_and_label)[0])

    biases_regularizer = tf.no_regularizer
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY)

    # list as many types of layers as possible, even if they are not used now
    with slim.arg_scope([
            slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose,
            slim.separable_conv2d, slim.fully_connected
    ],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
        final_boxes, final_scores, final_category, loss_dict = \
            faster_rcnn.build_whole_detection_network(input_img_batch=img_batch,
                                                      gtboxes_batch=gtboxes_and_label)

    dets_in_img = draw_boxes_with_categories_and_scores_rotate(
        img_batch=img_batch,
        boxes=final_boxes,
        labels=final_category,
        scores=final_scores)

    # ----------------------------------------------------------------------------------------------------build loss
    weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
    rpn_location_loss = loss_dict['rpn_loc_loss']
    rpn_cls_loss = loss_dict['rpn_cls_loss']
    rpn_total_loss = rpn_location_loss + rpn_cls_loss

    fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss']
    fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss']
    fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss

    total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss
    # ____________________________________________________________________________________________________build loss

    # ---------------------------------------------------------------------------------------------------add summary
    tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss)
    tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss)
    tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss)

    tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss)
    tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss)

    tf.summary.scalar('LOSS/total_loss', total_loss)
    tf.summary.scalar('LOSS/regular_weights', weight_decay_loss)

    tf.summary.image('img/gtboxes', gtboxes_in_img)
    tf.summary.image('img/dets', dets_in_img)

    # ___________________________________________________________________________________________________add summary

    global_step = slim.get_or_create_global_step()
    lr = tf.train.piecewise_constant(
        global_step,
        boundaries=[
            np.int64(cfgs.DECAY_STEP[0]),
            np.int64(cfgs.DECAY_STEP[1])
        ],
        values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.])
    tf.summary.scalar('lr', lr)
    optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)

    # ---------------------------------------------------------------------------------------------compute gradients
    gradients = faster_rcnn.get_gradients(optimizer, total_loss)

    # enlarge_gradients for bias
    if cfgs.MUTILPY_BIAS_GRADIENT:
        gradients = faster_rcnn.enlarge_gradients_for_bias(gradients)

    if cfgs.GRADIENT_CLIPPING_BY_NORM:
        with tf.name_scope('clip_gradients'):
            gradients = slim.learning.clip_gradient_norms(
                gradients, cfgs.GRADIENT_CLIPPING_BY_NORM)
    # _____________________________________________________________________________________________compute gradients

    # train_op
    train_op = optimizer.apply_gradients(grads_and_vars=gradients,
                                         global_step=global_step)
    summary_op = tf.summary.merge_all()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())

    restorer, restore_ckpt = faster_rcnn.get_restorer()
    saver = tf.train.Saver(max_to_keep=10)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        sess.run(init_op)
        if not restorer is None:
            restorer.restore(sess, restore_ckpt)
            print('restore model')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)

        summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
        tools.mkdir(summary_path)
        summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph)

        for step in range(cfgs.MAX_ITERATION):
            training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                          time.localtime(time.time()))

            if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                _, global_stepnp = sess.run([train_op, global_step])

            else:
                if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                    start = time.time()

                    _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \
                    _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \
                    _fast_rcnn_total_loss, _total_loss, _ = \
                        sess.run([global_step, img_name_batch, rpn_location_loss, rpn_cls_loss,
                                  rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss,
                                  fastrcnn_total_loss, total_loss, train_op])

                    # final_boxes_r, _final_scores_r, _final_category_r = sess.run([final_boxes_r, final_scores_r, final_category_r])
                    # print('*'*100)
                    # print(_final_boxes_r)
                    # print(_final_scores_r)
                    # print(_final_category_r)

                    end = time.time()
                    print(""" {}: step{}    image_name:{} |\t
                                                    rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t
                                                    rpn_total_loss:{} |
                                                    fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t
                                                    fast_rcnn_total_loss:{} |\t
                                                    total_loss:{} |\t pre_cost_time:{}s""" \
                          .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss,
                                  _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss,
                                  _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss,
                                  (end - start)))
                else:
                    if step % cfgs.SMRY_ITER == 0:
                        _, global_stepnp, summary_str = sess.run(
                            [train_op, global_step, summary_op])
                        summary_writer.add_summary(summary_str, global_stepnp)
                        summary_writer.flush()

            if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE
                    == 0) or (step == cfgs.MAX_ITERATION - 1):

                save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                if not os.path.exists(save_dir):
                    os.makedirs(save_dir)

                save_ckpt = os.path.join(
                    save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt')
                saver.save(sess, save_ckpt)
                print(' weights had been saved')

        coord.request_stop()
        coord.join(threads)
Пример #55
0
                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v1_200'):
    """ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
    blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 23 + [(512, 128, 2)]),
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
    ]
    return resnet_v1(inputs,
                     blocks,
                     num_classes,
                     is_training,
                     global_pool=global_pool,
                     output_stride=output_stride,
                     include_root_block=True,
                     spatial_squeeze=spatial_squeeze,
                     reuse=reuse,
                     scope=scope)


resnet_v1_200.default_image_size = resnet_v1.default_image_size

if __name__ == '__main__':
    input = tf.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input')
    with slim.arg_scope(resnet_arg_scope()) as sc:
        logits = resnet_v1_50(input)
Пример #56
0
def STsingle(inputs, outputs, loss_weight, labels):
    # Mean subtraction (BGR) for flying chairs
    mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean")
    # tf.tile(mean, [4,192,256,1])
    inputs = inputs - mean
    outputs = outputs - mean
    # Scaling to 0 ~ 1 or -0.4 ~ 0.6?
    inputs = tf.truediv(inputs, 255.0)
    outputs = tf.truediv(outputs, 255.0)

    # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss
    inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7)
    outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7)

    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected], 
                        activation_fn=tf.nn.elu):

        '''
        Shared conv layers
        '''
        conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1')
        # conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1')
        conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2')
        pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1')

        conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1')
        conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2')
        pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2')

        conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1')
        conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2')
        conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3')
        pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3')

        conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1')
        conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2')
        conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3')
        pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4')

        conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1')
        conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2')
        conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3')
        pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5')
        # print pool5.get_shape()
        '''
        Spatial branch
        '''
        flatten5 = slim.flatten(pool5, scope='flatten5')
        fc6 = slim.fully_connected(flatten5, 4096, scope='fc6')
        dropout6 = slim.dropout(fc6, 0.9, scope='dropout6')
        fc7 = slim.fully_connected(dropout6, 4096, scope='fc7')
        dropout7 = slim.dropout(fc7, 0.9, scope='dropout7')
        fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8')
        prob = tf.nn.softmax(fc8)
        actionPredictions = tf.argmax(prob, 1)

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels)
        actionLoss = tf.reduce_mean(cross_entropy)

        '''
        Temporal branch
        '''
        # Hyper-params for computing unsupervised loss
        epsilon = 0.0001 
        alpha_c = 0.3
        alpha_s = 0.3
        lambda_smooth = 0.8
        FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights")
        scale = 2       # for deconvolution

        # Expanding part
        pr5 = slim.conv2d(pool5, 2, [3, 3], activation_fn=None, scope='pr5')
        h5 = pr5.get_shape()[1].value
        w5 = pr5.get_shape()[2].value
        pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5])
        pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5])
        flow_scale_5 = 0.625    # (*20/32)
        loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights)
        upconv4 = slim.conv2d_transpose(pool5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4')
        pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4')
        concat4 = tf.concat(3, [pool4, upconv4, pr5to4])

        pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4')
        h4 = pr4.get_shape()[1].value
        w4 = pr4.get_shape()[2].value
        pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4])
        pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4])
        flow_scale_4 = 1.25    # (*20/16)
        loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights)
        upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3')
        pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3')
        concat3 = tf.concat(3, [pool3, upconv3, pr4to3])

        pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3')
        h3 = pr3.get_shape()[1].value
        w3 = pr3.get_shape()[2].value
        pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3])
        pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3])
        flow_scale_3 = 2.5    # (*20/8)
        loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights)
        upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2')
        pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2')
        concat2 = tf.concat(3, [pool2, upconv2, pr3to2])

        pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2')
        h2 = pr2.get_shape()[1].value
        w2 = pr2.get_shape()[2].value
        pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2])
        pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2])
        flow_scale_2 = 5.0    # (*20/4)
        loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights)
        upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1')
        pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1')
        concat1 = tf.concat(3, [pool1, upconv1, pr2to1])

        pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1')
        h1 = pr1.get_shape()[1].value
        w1 = pr1.get_shape()[2].value
        pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1])
        pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1])
        flow_scale_1 = 10.0    # (*20/2) 
        loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights)
        
        # Adding intermediate losses
        all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \
                    loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[0]*actionLoss
        slim.losses.add_loss(all_loss)

        losses = [loss1, loss2, loss3, loss4, loss5, actionLoss]
        flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5]
        
        predictions = [prev1, actionPredictions]
        return losses, flows_all, predictions
Пример #57
0
def mobilenet_v2(input, weight_decay, batch_norm_params):
    features = {}
    with tf.variable_scope('Mobilenet'):
        with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], \
                            activation_fn=tf.nn.relu6,\
                            weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                            biases_initializer=tf.zeros_initializer(),
                            weights_regularizer=slim.l2_regularizer(weight_decay),
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            padding='SAME'):
            print('Mobilnet input shape({}): {}'.format(
                input.name, input.get_shape()))

            # 96*96*3   112*112*3
            conv_1 = slim.convolution2d(input,
                                        32, [3, 3],
                                        stride=2,
                                        scope='conv_1')
            print(conv_1.name, conv_1.get_shape())

            # 48*48*32  56*56*32
            conv2_1 = slim.separable_convolution2d(conv_1,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv2_1/dwise')
            print(conv2_1.name, conv2_1.get_shape())
            conv2_1 = slim.convolution2d(conv2_1,
                                         16, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv2_1/linear')
            print(conv2_1.name, conv2_1.get_shape())
            features['feature2'] = conv2_1
            # 48*48*16  56*56*16
            conv3_1 = slim.convolution2d(conv2_1,
                                         96, [1, 1],
                                         stride=1,
                                         scope='conv3_1/expand')
            print(conv3_1.name, conv3_1.get_shape())
            conv3_1 = slim.separable_convolution2d(conv3_1,
                                                   num_outputs=None,
                                                   stride=2,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv3_1/dwise')
            print(conv3_1.name, conv3_1.get_shape())
            conv3_1 = slim.convolution2d(conv3_1,
                                         24, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv3_1/linear')
            print(conv3_1.name, conv3_1.get_shape())

            conv3_2 = slim.convolution2d(conv3_1,
                                         144, [1, 1],
                                         stride=1,
                                         scope='conv3_2/expand')
            print(conv3_2.name, conv3_2.get_shape())
            conv3_2 = slim.separable_convolution2d(conv3_2,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv3_2/dwise')
            print(conv3_2.name, conv3_2.get_shape())
            conv3_2 = slim.convolution2d(conv3_2,
                                         24, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv3_2/linear')
            print(conv3_2.name, conv3_2.get_shape())
            block_3_2 = conv3_1 + conv3_2
            print(block_3_2.name, block_3_2.get_shape())

            features['feature3'] = block_3_2
            features['pfld'] = block_3_2
            # 24*24*24   28*28*24
            conv4_1 = slim.convolution2d(block_3_2,
                                         144, [1, 1],
                                         stride=1,
                                         scope='conv4_1/expand')
            print(conv4_1.name, conv4_1.get_shape())
            conv4_1 = slim.separable_convolution2d(conv4_1,
                                                   num_outputs=None,
                                                   stride=2,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv4_1/dwise')
            print(conv4_1.name, conv4_1.get_shape())
            conv4_1 = slim.convolution2d(conv4_1,
                                         32, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv4_1/linear')
            print(conv4_1.name, conv4_1.get_shape())

            conv4_2 = slim.convolution2d(conv4_1,
                                         192, [1, 1],
                                         stride=1,
                                         scope='conv4_2/expand')
            print(conv4_2.name, conv4_2.get_shape())
            conv4_2 = slim.separable_convolution2d(conv4_2,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv4_2/dwise')
            print(conv4_2.name, conv4_2.get_shape())
            conv4_2 = slim.convolution2d(conv4_2,
                                         32, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv4_2/linear')
            print(conv4_2.name, conv4_2.get_shape())
            block_4_2 = conv4_1 + conv4_2
            print(block_4_2.name, block_4_2.get_shape())

            conv4_3 = slim.convolution2d(block_4_2,
                                         192, [1, 1],
                                         stride=1,
                                         scope='conv4_3/expand')
            print(conv4_3.name, conv4_3.get_shape())
            conv4_3 = slim.separable_convolution2d(conv4_3,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv4_3/dwise')
            print(conv4_3.name, conv4_3.get_shape())
            conv4_3 = slim.convolution2d(conv4_3,
                                         32, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv4_3/linear')
            print(conv4_3.name, conv4_3.get_shape())
            block_4_3 = block_4_2 + conv4_3
            print(block_4_3.name, block_4_3.get_shape())

            # 12*12*32   14*14*32
            features['feature4'] = block_4_3
            conv5_1 = slim.convolution2d(block_4_3,
                                         192, [1, 1],
                                         stride=1,
                                         scope='conv5_1/expand')
            print(conv5_1.name, conv5_1.get_shape())
            conv5_1 = slim.separable_convolution2d(conv5_1,
                                                   num_outputs=None,
                                                   stride=2,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_1/dwise')
            print(conv5_1.name, conv5_1.get_shape())
            conv5_1 = slim.convolution2d(conv5_1,
                                         64, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_1/linear')
            print(conv5_1.name, conv5_1.get_shape())

            conv5_2 = slim.convolution2d(conv5_1,
                                         384, [1, 1],
                                         stride=1,
                                         scope='conv5_2/expand')
            print(conv5_2.name, conv5_2.get_shape())
            conv5_2 = slim.separable_convolution2d(conv5_2,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_2/dwise')
            print(conv5_2.name, conv5_2.get_shape())
            conv5_2 = slim.convolution2d(conv5_2,
                                         64, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_2/linear')
            print(conv5_2.name, conv5_2.get_shape())
            block_5_2 = conv5_1 + conv5_2
            print(block_5_2.name, block_5_2.get_shape())

            conv5_3 = slim.convolution2d(block_5_2,
                                         384, [1, 1],
                                         stride=1,
                                         scope='conv5_3/expand')
            print(conv5_3.name, conv5_3.get_shape())
            conv5_3 = slim.separable_convolution2d(conv5_3,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_3/dwise')
            print(conv5_3.name, conv5_3.get_shape())
            conv5_3 = slim.convolution2d(conv5_3,
                                         64, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_3/linear')
            print(conv5_3.name, conv5_3.get_shape())
            block_5_3 = block_5_2 + conv5_3
            print(block_5_3.name, block_5_3.get_shape())

            conv5_4 = slim.convolution2d(block_5_3,
                                         384, [1, 1],
                                         stride=1,
                                         scope='conv5_4/expand')
            print(conv5_4.name, conv5_4.get_shape())
            conv5_4 = slim.separable_convolution2d(conv5_4,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv5_4/dwise')
            print(conv5_4.name, conv5_4.get_shape())
            conv5_4 = slim.convolution2d(conv5_4,
                                         64, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv5_4/linear')
            print(conv5_4.name, conv5_4.get_shape())
            block_5_4 = block_5_3 + conv5_4
            print(block_5_4.name, block_5_4.get_shape())

            # 6*6*64    7*7*64
            conv6_1 = slim.convolution2d(block_5_4,
                                         384, [1, 1],
                                         stride=1,
                                         scope='conv6_1/expand')
            print(conv6_1.name, conv6_1.get_shape())
            conv6_1 = slim.separable_convolution2d(conv6_1,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv6_1/dwise')
            print(conv6_1.name, conv6_1.get_shape())
            conv6_1 = slim.convolution2d(conv6_1,
                                         96, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv6_1/linear')
            print(conv6_1.name, conv6_1.get_shape())

            conv6_2 = slim.convolution2d(conv6_1,
                                         576, [1, 1],
                                         stride=1,
                                         scope='conv6_2/expand')
            print(conv6_2.name, conv6_2.get_shape())
            conv6_2 = slim.separable_convolution2d(conv6_2,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv6_2/dwise')
            print(conv6_2.name, conv6_2.get_shape())
            conv6_2 = slim.convolution2d(conv6_2,
                                         96, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv6_2/linear')
            print(conv6_2.name, conv6_2.get_shape())
            block_6_2 = conv6_1 + conv6_2
            print(block_6_2.name, block_6_2.get_shape())

            conv6_3 = slim.convolution2d(block_6_2,
                                         576, [1, 1],
                                         stride=1,
                                         scope='conv6_3/expand')
            print(conv6_3.name, conv6_3.get_shape())
            conv6_3 = slim.separable_convolution2d(conv6_3,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv6_3/dwise')
            print(conv6_3.name, conv6_3.get_shape())
            conv6_3 = slim.convolution2d(conv6_3,
                                         96, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv6_3/linear')
            print(conv6_3.name, conv6_3.get_shape())
            block_6_3 = block_6_2 + conv6_3
            print(block_6_3.name, block_6_3.get_shape())

            features['feature5'] = block_6_3
            # 6*6*96    7*7*96
            conv7_1 = slim.convolution2d(block_6_3,
                                         576, [1, 1],
                                         stride=1,
                                         scope='conv7_1/expand')
            print(conv7_1.name, conv7_1.get_shape())
            conv7_1 = slim.separable_convolution2d(conv7_1,
                                                   num_outputs=None,
                                                   stride=2,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv7_1/dwise')
            print(conv7_1.name, conv7_1.get_shape())
            conv7_1 = slim.convolution2d(conv7_1,
                                         160, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv7_1/linear')
            print(conv7_1.name, conv7_1.get_shape())

            conv7_2 = slim.convolution2d(conv7_1,
                                         960, [1, 1],
                                         stride=1,
                                         scope='conv7_2/expand')
            print(conv7_2.name, conv7_2.get_shape())
            conv7_2 = slim.separable_convolution2d(conv7_2,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv7_2/dwise')
            print(conv7_2.name, conv7_2.get_shape())
            conv7_2 = slim.convolution2d(conv7_2,
                                         160, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv7_2/linear')
            print(conv7_2.name, conv7_2.get_shape())
            block_7_2 = conv7_1 + conv7_2
            print(block_7_2.name, block_7_2.get_shape())

            conv7_3 = slim.convolution2d(block_7_2,
                                         960, [1, 1],
                                         stride=1,
                                         scope='conv7_3/expand')
            print(conv7_3.name, conv7_3.get_shape())
            conv7_3 = slim.separable_convolution2d(conv7_3,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv7_3/dwise')
            print(conv7_3.name, conv7_3.get_shape())
            conv7_3 = slim.convolution2d(conv7_3,
                                         160, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv7_3/linear')
            print(conv7_3.name, conv7_3.get_shape())
            block_7_3 = block_7_2 + conv7_3
            print(block_7_3.name, block_7_3.get_shape())

            conv7_4 = slim.convolution2d(block_7_3,
                                         960, [1, 1],
                                         stride=1,
                                         scope='conv7_4/expand')
            print(conv7_4.name, conv7_4.get_shape())
            conv7_4 = slim.separable_convolution2d(conv7_4,
                                                   num_outputs=None,
                                                   stride=1,
                                                   depth_multiplier=1,
                                                   kernel_size=[3, 3],
                                                   scope='conv7_4/dwise')
            print(conv7_4.name, conv7_4.get_shape())
            conv7_4 = slim.convolution2d(conv7_4,
                                         320, [1, 1],
                                         stride=1,
                                         activation_fn=None,
                                         scope='conv7_4/linear')
            print(conv7_4.name, conv7_4.get_shape())
            features['feature6'] = conv7_4
    return features
Пример #58
0
def build_graph(reader,
                model,
                train_data_pattern,
                label_loss_fn=losses.CrossEntropyLoss(),
                batch_size=1000,
                base_learning_rate=0.01,
                learning_rate_decay_examples=1000000,
                learning_rate_decay=0.95,
                optimizer_class=tf.train.AdamOptimizer,
                clip_gradient_norm=1.0,
                regularization_penalty=1,
                num_readers=1,
                num_epochs=None):
    """Creates the Tensorflow graph.

  This will only be called once in the life of
  a training model, because after the graph is created the model will be
  restored from a meta graph file rather than being recreated.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    train_data_pattern: glob path to the training data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    base_learning_rate: What learning rate to initialize the optimizer with.
    optimizer_class: Which optimization algorithm to use.
    clip_gradient_norm: Magnitude of the gradient to clip to.
    regularization_penalty: How much weight to give the regularization loss
                            compared to the label loss.
    num_readers: How many threads to use for I/O operations.
    num_epochs: How many passes to make over the data. 'None' means an
                unlimited number of passes.
  """

    global_step = tf.Variable(0, trainable=False, name="global_step")

    local_device_protos = device_lib.list_local_devices()
    gpus = [x.name for x in local_device_protos if x.device_type == 'GPU']
    num_gpus = len(gpus)

    if num_gpus > 0:
        logging.info("Using the following GPUs to train: " + str(gpus))
        num_towers = num_gpus
        device_string = '/gpu:%d'
    else:
        logging.info("No GPUs found. Training on CPU.")
        num_towers = 1
        device_string = '/cpu:%d'

    learning_rate = tf.train.exponential_decay(base_learning_rate,
                                               global_step * batch_size *
                                               num_towers,
                                               learning_rate_decay_examples,
                                               learning_rate_decay,
                                               staircase=True)
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = optimizer_class(learning_rate)
    unused_video_id, model_input_raw, labels_batch, num_frames = (
        get_input_data_tensors(reader,
                               train_data_pattern,
                               batch_size=batch_size * num_towers,
                               num_readers=num_readers,
                               num_epochs=num_epochs))
    tf.summary.histogram("model/input_raw", model_input_raw)

    feature_dim = len(model_input_raw.get_shape()) - 1

    model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

    tower_inputs = tf.split(model_input, num_towers)
    tower_labels = tf.split(labels_batch, num_towers)
    tower_num_frames = tf.split(num_frames, num_towers)
    tower_gradients = []
    tower_predictions = []
    tower_label_losses = []
    tower_reg_losses = []
    # PRCCConcat
    phase = tf.constant(True)
    #
    for i in range(num_towers):
        # For some reason these 'with' statements can't be combined onto the same
        # line. They have to be nested.
        with tf.device(device_string % i):
            with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)):
                with (slim.arg_scope(
                    [slim.model_variable, slim.variable],
                        device="/cpu:0" if num_gpus != 1 else "/gpu:0")):
                    result = model.create_model(
                        tower_inputs[i],
                        num_frames=tower_num_frames[i],
                        vocab_size=reader.num_classes,
                        labels=tower_labels[i],
                        # PRCCConcat
                        is_training=phase)
                    #)
                    for variable in slim.get_model_variables():
                        tf.summary.histogram(variable.op.name, variable)

                    predictions = result["predictions"]
                    tower_predictions.append(predictions)

                    if "loss" in result.keys():
                        label_loss = result["loss"]
                    else:
                        label_loss = label_loss_fn.calculate_loss(
                            predictions, tower_labels[i])

                    if "regularization_loss" in result.keys():
                        reg_loss = result["regularization_loss"]
                    else:
                        reg_loss = tf.constant(0.0)

                    reg_losses = tf.losses.get_regularization_losses()
                    if reg_losses:
                        reg_loss += tf.add_n(reg_losses)

                    tower_reg_losses.append(reg_loss)

                    # Adds update_ops (e.g., moving average updates in batch normalization) as
                    # a dependency to the train_op.
                    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                    if "update_ops" in result.keys():
                        update_ops += result["update_ops"]
                    if update_ops:
                        with tf.control_dependencies(update_ops):
                            barrier = tf.no_op(name="gradient_barrier")
                            with tf.control_dependencies([barrier]):
                                label_loss = tf.identity(label_loss)

                    tower_label_losses.append(label_loss)

                    # Incorporate the L2 weight penalties etc.
                    final_loss = regularization_penalty * reg_loss + label_loss
                    gradients = optimizer.compute_gradients(
                        final_loss, colocate_gradients_with_ops=False)
                    tower_gradients.append(gradients)
    label_loss = tf.reduce_mean(tf.stack(tower_label_losses))
    tf.summary.scalar("label_loss", label_loss)
    if regularization_penalty != 0:
        reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses))
        tf.summary.scalar("reg_loss", reg_loss)
    merged_gradients = utils.combine_gradients(tower_gradients)

    if clip_gradient_norm > 0:
        with tf.name_scope('clip_grads'):
            merged_gradients = utils.clip_gradient_norms(
                merged_gradients, clip_gradient_norm)

    train_op = optimizer.apply_gradients(merged_gradients,
                                         global_step=global_step)

    tf.add_to_collection("global_step", global_step)
    tf.add_to_collection("loss", label_loss)
    tf.add_to_collection("predictions", tf.concat(tower_predictions, 0))
    tf.add_to_collection("input_batch_raw", model_input_raw)
    tf.add_to_collection("input_batch", model_input)
    tf.add_to_collection("num_frames", num_frames)
    tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
    tf.add_to_collection("train_op", train_op)
    # PRCCConcat
    tf.add_to_collection("phase", phase)
Пример #59
0
    def create_architecture(self,
                            mode,
                            num_classes,
                            tag=None,
                            anchor_scales=(8, 16, 32),
                            anchor_ratios=(0.5, 1, 2)):
        self._image = tf.placeholder(tf.float32, shape=[1, None, None, 3 + 18])
        self._im_info = tf.placeholder(tf.float32, shape=[3])
        self._gt_boxes = tf.placeholder(tf.float32, shape=[None, 5])
        self._tag = tag

        self._num_classes = num_classes
        self._mode = mode
        self._anchor_scales = anchor_scales
        self._num_scales = len(anchor_scales)

        self._anchor_ratios = anchor_ratios
        self._num_ratios = len(anchor_ratios)

        self._num_anchors = self._num_scales * self._num_ratios

        training = mode == 'TRAIN'
        testing = mode == 'TEST'
        print('Training', training, 'Testing', testing)

        assert tag != None

        # handle most of the regularizers here
        weights_regularizer = tf.contrib.layers.l2_regularizer(
            cfg.TRAIN.WEIGHT_DECAY)
        if cfg.TRAIN.BIAS_DECAY:
            biases_regularizer = weights_regularizer
        else:
            biases_regularizer = tf.no_regularizer

        # list as many types of layers as possible, even if they are not used now
        with arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                        slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
                        weights_regularizer=weights_regularizer,
                        biases_regularizer=biases_regularizer,
                        biases_initializer=tf.constant_initializer(0.0)):
            rois, cls_prob, bbox_pred = self._build_network(training)

        layers_to_output = {'rois': rois}

        for var in tf.trainable_variables():
            self._train_summaries.append(var)

        if testing:
            stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS),
                           (self._num_classes))
            means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS),
                            (self._num_classes))
            self._predictions["bbox_pred"] *= stds
            self._predictions["bbox_pred"] += means
        else:
            self._add_losses()
            layers_to_output.update(self._losses)

            val_summaries = []
            with tf.device("/cpu:0"):
                # val_summaries.append(self._add_gt_image_summary())
                val_summaries.extend(list(self._add_gt_image_summary()))
                for key, var in self._event_summaries.items():
                    val_summaries.append(tf.summary.scalar(key, var))
                for key, var in self._score_summaries.items():
                    self._add_score_summary(key, var)
                for var in self._act_summaries:
                    self._add_act_summary(var)
                for var in self._train_summaries:
                    self._add_train_summary(var)

            self._summary_op = tf.summary.merge_all()
            self._summary_op_val = tf.summary.merge(val_summaries)

        layers_to_output.update(self._predictions)

        return layers_to_output
Пример #60
0
def STbaseline(inputs, outputs, loss_weight, labels):
    """
    Spatial stream based on VGG16
    Temporal stream based on Flownet simple
    """

    # Mean subtraction (BGR) for flying chairs
    mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean")
    # tf.tile(mean, [4,192,256,1])
    inputs = inputs - mean
    outputs = outputs - mean
    # Scaling to 0 ~ 1 or -0.4 ~ 0.6?
    inputs = tf.truediv(inputs, 255.0)
    outputs = tf.truediv(outputs, 255.0)

    # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss
    inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7)
    outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7)

    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], 
                        activation_fn=tf.nn.elu):       # original use leaky ReLU, now we use elu
        # Contracting part
        Tconv1   = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [7, 7], stride=2, scope='Tconv1')
        Tconv2   = slim.conv2d(Tconv1, 128, [5, 5], stride=2, scope='Tconv2')
        Tconv3_1 = slim.conv2d(Tconv2, 256, [5, 5], stride=2, scope='Tconv3_1')
        Tconv3_2 = slim.conv2d(Tconv3_1, 256, [3, 3], scope='Tconv3_2')
        Tconv4_1 = slim.conv2d(Tconv3_2, 512, [3, 3], stride=2, scope='Tconv4_1')
        Tconv4_2 = slim.conv2d(Tconv4_1, 512, [3, 3], scope='Tconv4_2')
        Tconv5_1 = slim.conv2d(Tconv4_2, 512, [3, 3], stride=2, scope='Tconv5_1')
        Tconv5_2 = slim.conv2d(Tconv5_1, 512, [3, 3], scope='Tconv5_2')
        Tconv6_1 = slim.conv2d(Tconv5_2, 1024, [3, 3], stride=2, scope='Tconv6_1')
        Tconv6_2 = slim.conv2d(Tconv6_1, 1024, [3, 3], scope='Tconv6_2')

        # Hyper-params for computing unsupervised loss
        epsilon = 0.0001 
        alpha_c = 0.25
        alpha_s = 0.37
        lambda_smooth = 1.0
        FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights")
        scale = 2       # for deconvolution

        # Expanding part
        pr6 = slim.conv2d(Tconv6_2, 2, [3, 3], activation_fn=None, scope='pr6')
        h6 = pr6.get_shape()[1].value
        w6 = pr6.get_shape()[2].value
        pr6_input = tf.image.resize_bilinear(inputs_norm, [h6, w6])
        pr6_output = tf.image.resize_bilinear(outputs_norm, [h6, w6])
        flow_scale_6 = 0.3125    # (*20/64)
        loss6, _ = loss_interp(pr6, pr6_input, pr6_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_6, FlowDeltaWeights)
        upconv5 = slim.conv2d_transpose(Tconv6_2, 512, [2*scale, 2*scale], stride=scale, scope='upconv5')
        pr6to5 = slim.conv2d_transpose(pr6, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr6to5')
        concat5 = tf.concat(3, [Tconv5_2, upconv5, pr6to5])

        pr5 = slim.conv2d(concat5, 2, [3, 3], activation_fn=None, scope='pr5')
        h5 = pr5.get_shape()[1].value
        w5 = pr5.get_shape()[2].value
        pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5])
        pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5])
        flow_scale_5 = 0.625    # (*20/32)
        loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights)
        upconv4 = slim.conv2d_transpose(concat5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4')
        pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4')
        concat4 = tf.concat(3, [Tconv4_2, upconv4, pr5to4])

        pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4')
        h4 = pr4.get_shape()[1].value
        w4 = pr4.get_shape()[2].value
        pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4])
        pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4])
        flow_scale_4 = 1.25    # (*20/16)
        loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights)
        upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3')
        pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3')
        concat3 = tf.concat(3, [Tconv3_2, upconv3, pr4to3])

        pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3')
        h3 = pr3.get_shape()[1].value
        w3 = pr3.get_shape()[2].value
        pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3])
        pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3])
        flow_scale_3 = 2.5    # (*20/8)
        loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights)
        upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2')
        pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2')
        concat2 = tf.concat(3, [Tconv2, upconv2, pr3to2])

        pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2')
        h2 = pr2.get_shape()[1].value
        w2 = pr2.get_shape()[2].value
        pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2])
        pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2])
        flow_scale_2 = 5.0    # (*20/4)
        loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights)
        upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1')
        pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1')
        concat1 = tf.concat(3, [Tconv1, upconv1, pr2to1])

        pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1')
        h1 = pr1.get_shape()[1].value
        w1 = pr1.get_shape()[2].value
        pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1])
        pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1])
        flow_scale_1 = 10.0    # (*20/2) 
        loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights)
        
    with slim.arg_scope([slim.conv2d, slim.fully_connected], 
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
                        weights_regularizer=slim.l2_regularizer(0.0005)):

        # conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1')
        conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1')
        conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2')
        pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1')

        conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1')
        conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2')
        pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2')

        conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1')
        conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2')
        conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3')
        pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3')

        conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1')
        conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2')
        conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3')
        pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4')

        conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1')
        conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2')
        conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3')
        pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5')

        # Incorporate temporal feature
        concatST = tf.concat(3, [pool5, Tconv5_2])
        poolST = slim.max_pool2d(concatST, [2, 2])
        # print poolST.get_shape()
        concat2ST = tf.concat(3, [poolST, Tconv6_2])
        # print concat2ST.get_shape()
        concatDR = slim.conv2d(concat2ST, 512, [1, 1])
        # print concatDR.get_shape()

        flatten5 = slim.flatten(concatDR, scope='flatten5')
        fc6 = slim.fully_connected(flatten5, 4096, scope='fc6')
        dropout6 = slim.dropout(fc6, 0.9, scope='dropout6')
        fc7 = slim.fully_connected(dropout6, 4096, scope='fc7')
        dropout7 = slim.dropout(fc7, 0.9, scope='dropout7')
        fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8')
        prob = tf.nn.softmax(fc8)
        actionPredictions = tf.argmax(prob, 1)

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels)
        actionLoss = tf.reduce_mean(cross_entropy)

        # Adding intermediate losses
        all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \
                    loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[5]*loss6["total"] + \
                    loss_weight[0]*actionLoss
        slim.losses.add_loss(all_loss)

        losses = [loss1, loss2, loss3, loss4, loss5, loss6, actionLoss]
        # pr1 = tf.mul(tf.constant(20.0), pr1)
        flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5, pr6*flow_scale_6]
        
        predictions = [prev1, actionPredictions]
        return losses, flows_all, predictions