Пример #1
0
def conv_net_kelz(inputs):
  """Builds the ConvNet from Kelz 2016."""
  with slim.arg_scope(
      [slim.conv2d, slim.fully_connected],
      activation_fn=tf.nn.relu,
      weights_initializer=tf.contrib.layers.variance_scaling_initializer(
          factor=2.0, mode='FAN_AVG', uniform=True)):
    net = slim.conv2d(
        inputs, 32, [3, 3], scope='conv1', normalizer_fn=slim.batch_norm)

    net = slim.conv2d(
        net, 32, [3, 3], scope='conv2', normalizer_fn=slim.batch_norm)
    net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool2')
    net = slim.dropout(net, 0.25, scope='dropout2')

    net = slim.conv2d(
        net, 64, [3, 3], scope='conv3', normalizer_fn=slim.batch_norm)
    net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool3')
    net = slim.dropout(net, 0.25, scope='dropout3')

    # Flatten while preserving batch and time dimensions.
    dims = tf.shape(net)
    net = tf.reshape(net, (dims[0], dims[1],
                           net.shape[2].value * net.shape[3].value), 'flatten4')

    net = slim.fully_connected(net, 512, scope='fc5')
    net = slim.dropout(net, 0.5, scope='dropout5')

    return net
def build_graph(top_k):
    keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
    images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch')
    labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')
    is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag')
    with tf.device('/gpu:0'):
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            normalizer_fn=slim.batch_norm,
                            normalizer_params={'is_training': is_training}):
            conv3_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv3_1')
            max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2], padding='SAME', scope='pool1')
            conv3_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv3_2')
            max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool2')
            conv3_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3_3')
            max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2], padding='SAME', scope='pool3')
            conv3_4 = slim.conv2d(max_pool_3, 512, [3, 3], padding='SAME', scope='conv3_4')
            conv3_5 = slim.conv2d(conv3_4, 512, [3, 3], padding='SAME', scope='conv3_5')
            max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='SAME', scope='pool4')

            flatten = slim.flatten(max_pool_4)
            fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024,
                                       activation_fn=tf.nn.relu, scope='fc1')
            logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None,
                                          scope='fc2')
        loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            updates = tf.group(*update_ops)
            loss = control_flow_ops.with_dependencies([updates], loss)

        global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
        train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step)
        probabilities = tf.nn.softmax(logits)

        tf.summary.scalar('loss', loss)
        tf.summary.scalar('accuracy', accuracy)
        merged_summary_op = tf.summary.merge_all()
        predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k)
        accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32))

    return {'images': images,
            'labels': labels,
            'keep_prob': keep_prob,
            'top_k': top_k,
            'global_step': global_step,
            'train_op': train_op,
            'loss': loss,
            'is_training': is_training,
            'accuracy': accuracy,
            'accuracy_top_k': accuracy_in_top_k,
            'merged_summary_op': merged_summary_op,
            'predicted_distribution': probabilities,
            'predicted_index_top_k': predicted_index_top_k,
            'predicted_val_top_k': predicted_val_top_k}
Пример #3
0
  def construct_embedding(self):
    """Builds a conv -> spatial softmax -> FC adaptation network."""
    is_training = self._is_training
    normalizer_params = {'is_training': is_training}
    with tf.variable_scope('tcn_net', reuse=self._reuse) as vs:
      self._adaptation_scope = vs.name
      with slim.arg_scope(
          [slim.layers.conv2d],
          activation_fn=tf.nn.relu,
          normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params,
          weights_regularizer=slim.regularizers.l2_regularizer(
              self._l2_reg_weight),
          biases_regularizer=slim.regularizers.l2_regularizer(
              self._l2_reg_weight)):
        with slim.arg_scope(
            [slim.layers.fully_connected],
            activation_fn=tf.nn.relu,
            normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params,
            weights_regularizer=slim.regularizers.l2_regularizer(
                self._l2_reg_weight),
            biases_regularizer=slim.regularizers.l2_regularizer(
                self._l2_reg_weight)):

          # Input to embedder is pre-trained inception output.
          net = self._pretrained_output

          # Optionally add more conv layers.
          for num_filters in self._additional_conv_sizes:
            net = slim.layers.conv2d(
                net, num_filters, kernel_size=[3, 3], stride=[1, 1])
            net = slim.dropout(net, keep_prob=self._conv_hidden_keep_prob,
                               is_training=is_training)

          # Take the spatial soft arg-max of the last convolutional layer.
          # This is a form of spatial attention over the activations.
          # See more here: http://arxiv.org/abs/1509.06113.
          net = tf.contrib.layers.spatial_softmax(net)
          self.spatial_features = net

          # Add fully connected layers.
          net = slim.layers.flatten(net)
          for fc_hidden_size in self._fc_hidden_sizes:
            net = slim.layers.fully_connected(net, fc_hidden_size)
            if self._fc_hidden_keep_prob < 1.0:
              net = slim.dropout(net, keep_prob=self._fc_hidden_keep_prob,
                                 is_training=is_training)

          # Connect last FC layer to embedding.
          net = slim.layers.fully_connected(net, self._embedding_size,
                                            activation_fn=None)

          # Optionally L2 normalize the embedding.
          if self._embedding_l2:
            net = tf.nn.l2_normalize(net, dim=1)

          return net
Пример #4
0
def inference(inputs):
    x = tf.reshape(inputs,[-1,28,28,1])
    conv_1 = tf.nn.relu(slim.conv2d(x,32,[3,3])) #28 * 28 * 32
    pool_1 = slim.max_pool2d(conv_1,[2,2]) # 14 * 14 * 32
    block_1 = res_identity(pool_1,32,[3,3],'layer_2')
    block_2 = res_change(block_1,64,[3,3],'layer_3')
    block_3 = res_identity(block_2,64,[3,3],'layer_4')
    block_4 = res_change(block_3,32,[3,3],'layer_5')
    net_flatten = slim.flatten(block_4,scope='flatten')
    fc_1 = slim.fully_connected(slim.dropout(net_flatten,0.8),200,activation_fn=tf.nn.tanh,scope='fc_1')
    output = slim.fully_connected(slim.dropout(fc_1,0.8),10,activation_fn=None,scope='output_layer')
    return output
Пример #5
0
  def _build_network(self, sess, is_training=True):
    with tf.variable_scope('vgg_16', 'vgg_16'):
      # select initializers
      if cfg.TRAIN.TRUNCATED:
        initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
        initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
      else:
        initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
        initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

      net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3],
                        trainable=False, scope='conv1')
      net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1')
      net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3],
                        trainable=False, scope='conv2')
      net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2')
      net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3],
                        trainable=is_training, scope='conv3')
      net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3')
      net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
                        trainable=is_training, scope='conv4')
      net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4')
      net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3],
                        trainable=is_training, scope='conv5')
      self._act_summaries.append(net)
      self._layers['head'] = net
      # build the anchors for the image
      self._anchor_component()
      # region proposal network
      rois = self._region_proposal(net, is_training, initializer)
      # region of interest pooling
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(net, rois, "pool5")
      else:
        raise NotImplementedError

      pool5_flat = slim.flatten(pool5, scope='flatten')
      fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
      if is_training:
        fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6')
      fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
      if is_training:
        fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7')
      # region classification
      cls_prob, bbox_pred = self._region_classification(fc7, 
                                                        is_training, 
                                                        initializer, 
                                                        initializer_bbox)

      self._score_summaries.update(self._predictions)

      return rois, cls_prob, bbox_pred
Пример #6
0
  def _head_to_tail(self, pool5, is_training, reuse=False):
    with tf.variable_scope(self._scope, self._scope, reuse=reuse):
      pool5_flat = slim.flatten(pool5, scope='flatten')
      fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
      if is_training:
        fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, 
                            scope='dropout6')
      fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
      if is_training:
        fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, 
                            scope='dropout7')

    return fc7
Пример #7
0
def build_arch_baseline(input, is_train: bool, num_classes: int):

    bias_initializer = tf.truncated_normal_initializer(
        mean=0.0, stddev=0.01)  # tf.constant_initializer(0.0)
    # The paper didnot mention any regularization, a common l2 regularizer to weights is added here
    weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04)

    tf.logging.info('input shape: {}'.format(input.get_shape()))

    # weights_initializer=initializer,
    with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer):
        with tf.variable_scope('relu_conv1') as scope:
            output = slim.conv2d(input, num_outputs=32, kernel_size=[
                                 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu)
            output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer1')

            tf.logging.info('output shape: {}'.format(output.get_shape()))

        with tf.variable_scope('relu_conv2') as scope:
            output = slim.conv2d(output, num_outputs=64, kernel_size=[
                                 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu)
            output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer2')

            tf.logging.info('output shape: {}'.format(output.get_shape()))

        output = slim.flatten(output)
        output = slim.fully_connected(output, 1024, scope='relu_fc3', activation_fn=tf.nn.relu)
        tf.logging.info('output shape: {}'.format(output.get_shape()))
        output = slim.dropout(output, 0.5, scope='dp')
        output = slim.fully_connected(output, num_classes, scope='final_layer', activation_fn=None)
        tf.logging.info('output shape: {}'.format(output.get_shape()))
        return output
def LResnet50E_IR(images, keep_probability, 
             phase_train=True, bottleneck_layer_size=512, 
             weight_decay=0.0, reuse=None):
    '''
    conv name
    conv[conv_layer]_[block_index]_[block_layer_index]
    
    for resnet50 n_units=[3,4,14,3], consider one unit is dim_reduction_layer
    repeat n_units=[2,3,13,2]
    '''
    with tf.variable_scope('Conv1'):
        net = slim.conv2d(images,64,scope='Conv1_pre')
        net = slim.batch_norm(net,scope='Conv1_bn')
    with tf.variable_scope('Conv2'):
        net = resface_block(net,64,stride=2,dim_match=False,scope='Conv2_pre')
        net = slim.repeat(net,2,resface_block,64,1,True,scope='Conv2_main')
    with tf.variable_scope('Conv3'):
        net = resface_block(net,128,stride=2,dim_match=False,scope='Conv3_pre')
        net = slim.repeat(net,3,resface_block,128,1,True,scope='Conv3_main')
    with tf.variable_scope('Conv4'):
        net = resface_block(net,256,stride=2,dim_match=False,scope='Conv4_pre')
        net = slim.repeat(net,13,resface_block,256,1,True,scope='Conv4_main')
    with tf.variable_scope('Conv5'):
        net = resface_block(net,512,stride=2,dim_match=False,scope='Conv5_pre')
        net = slim.repeat(net,2,resface_block,512,1,True,scope='Conv5_main')

    with tf.variable_scope('Logits'):
        net = slim.batch_norm(net,activation_fn=None,scope='bn1')
        net = slim.dropout(net, keep_probability, is_training=phase_train,scope='Dropout')        
        net = slim.flatten(net)
    
    net = slim.fully_connected(net, bottleneck_layer_size, biases_initializer=tf.contrib.layers.xavier_initializer(), scope='fc1')
    net = slim.batch_norm(net, activation_fn=None, scope='Bottleneck')

    return net,''
def metric_net(img, scope, df_dim=64, reuse=False, train=True):

    bn = functools.partial(slim.batch_norm, scale=True, is_training=train,
                           decay=0.9, epsilon=1e-5, updates_collections=None)

    with tf.variable_scope(scope + '_discriminator', reuse=reuse):
        h0 = lrelu(conv(img, df_dim, 4, 2, scope='h0_conv'))    # h0 is (128 x 128 x df_dim)
        pool1 = Mpool(h0, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
		
        h1 = lrelu(conv(pool1, df_dim * 2, 4, 2, scope='h1_conv'))  # h1 is (32 x 32 x df_dim*2)
        pool2 = Mpool(h1, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
		
        h2 = lrelu(conv(pool2, df_dim * 4, 4, 2, scope='h2_conv'))  # h2 is (8 x 8 x df_dim*4)
        pool3 = Mpool(h2, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
		
        h3 = lrelu(conv(pool3, df_dim * 8, 4, 2, scope='h3_conv'))  # h3 is (2 x 2 x df_dim*4)
        pool4 = Mpool(h3, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
		
        shape = pool4.get_shape()
        flatten_shape = shape[1].value * shape[2].value * shape[3].value
        h3_reshape = tf.reshape(pool4, [-1, flatten_shape], name = 'h3_reshape')
		
        fc1 = lrelu(FC(h3_reshape, df_dim*2, scope='fc1'))
        dropout_fc1 = slim.dropout(fc1, 0.5, scope='dropout_fc1')  
        net = FC(dropout_fc1, df_dim, scope='fc2') 
        
        #print_activations(net)
        #print_activations(pool4)
        return net
Пример #10
0
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input = batch_queue.dequeue()
      anchors = tf.convert_to_tensor(config.ANCHOR_SHAPE, dtype=tf.float32)
      end_points = network_fn(images)
      end_points["viz_images"] = images
      conv_ds_14 = end_points['MobileNet/conv_ds_14/depthwise_conv']
      dropout = slim.dropout(conv_ds_14, keep_prob=0.5, is_training=True)
      num_output = config.NUM_ANCHORS * (config.NUM_CLASSES + 1 + 4)
      predict = slim.conv2d(dropout, num_output, kernel_size=(3, 3), stride=1, padding='SAME',
                            activation_fn=None,
                            weights_initializer=tf.truncated_normal_initializer(stddev=0.0001),
                            scope="MobileNet/conv_predict")

      with tf.name_scope("Interpre_prediction") as scope:
        pred_box_delta, pred_class_probs, pred_conf, ious, det_probs, det_boxes, det_class = \
          interpre_prediction(predict, b_input_mask, anchors, b_box_input)
        end_points["viz_det_probs"] = det_probs
        end_points["viz_det_boxes"] = det_boxes
        end_points["viz_det_class"] = det_class

      with tf.name_scope("Losses") as scope:
        losses(b_input_mask, b_labels_input, ious, b_box_delta_input, pred_class_probs, pred_conf, pred_box_delta)

      return end_points
def create_inner_block(
        incoming, scope, nonlinearity=tf.nn.elu,
        weights_initializer=tf.truncated_normal_initializer(1e-3),
        bias_initializer=tf.zeros_initializer(), regularizer=None,
        increase_dim=False, summarize_activations=True):
    n = incoming.get_shape().as_list()[-1]
    stride = 1
    if increase_dim:
        n *= 2
        stride = 2

    incoming = slim.conv2d(
        incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME",
        normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer,
        biases_initializer=bias_initializer, weights_regularizer=regularizer,
        scope=scope + "/1")
    if summarize_activations:
        tf.summary.histogram(incoming.name + "/activations", incoming)

    incoming = slim.dropout(incoming, keep_prob=0.6)

    incoming = slim.conv2d(
        incoming, n, [3, 3], 1, activation_fn=None, padding="SAME",
        normalizer_fn=None, weights_initializer=weights_initializer,
        biases_initializer=bias_initializer, weights_regularizer=regularizer,
        scope=scope + "/2")
    return incoming
Пример #12
0
  def construct_embedding(self):
    """Builds an embedding function on top of images.

    Method to be overridden by implementations.

    Returns:
      embeddings: A 2-d float32 `Tensor` of shape [batch_size, embedding_size]
        holding the embedded images.
    """
    with tf.variable_scope('tcn_net', reuse=self._reuse) as vs:
      self._adaptation_scope = vs.name
      net = self._pretrained_output

      # Define some adaptation blocks on top of the pre-trained resnet output.
      adaptation_blocks = []
      adaptation_block_params = [map(
          int, i.split('_')) for i in self._config.adaptation_blocks.split('-')]
      for i, (depth, num_units) in enumerate(adaptation_block_params):
        block = resnet_v2.resnet_v2_block(
            'adaptation_block_%d' % i, base_depth=depth, num_units=num_units,
            stride=1)
        adaptation_blocks.append(block)

      # Stack them on top of the resent output.
      net = resnet_utils.stack_blocks_dense(
          net, adaptation_blocks, output_stride=None)

      # Average pool the output.
      net = tf.reduce_mean(net, [1, 2], name='adaptation_pool', keep_dims=True)

      if self._config.emb_connection == 'fc':
        # Use fully connected layer to project to embedding layer.
        fc_hidden_sizes = self._config.fc_hidden_sizes
        if fc_hidden_sizes == 'None':
          fc_hidden_sizes = []
        else:
          fc_hidden_sizes = map(int, fc_hidden_sizes.split('_'))
        fc_hidden_keep_prob = self._config.dropout.keep_fc
        net = tf.squeeze(net)
        for fc_hidden_size in fc_hidden_sizes:
          net = slim.layers.fully_connected(net, fc_hidden_size)
          if fc_hidden_keep_prob < 1.0:
            net = slim.dropout(net, keep_prob=fc_hidden_keep_prob,
                               is_training=self._is_training)

        # Connect last FC layer to embedding.
        embedding = slim.layers.fully_connected(net, self._embedding_size,
                                                activation_fn=None)
      else:
        # Use 1x1 conv layer to project to embedding layer.
        embedding = slim.conv2d(
            net, self._embedding_size, [1, 1], activation_fn=None,
            normalizer_fn=None, scope='embedding')
        embedding = tf.squeeze(embedding)

      # Optionally L2 normalize the embedding.
      if self._embedding_l2:
        embedding = tf.nn.l2_normalize(embedding, dim=1)

      return embedding
def resface36(images, keep_probability, 
             phase_train=True, bottleneck_layer_size=512, 
             weight_decay=0.0, reuse=None):
    '''
    conv name
    conv[conv_layer]_[block_index]_[block_layer_index]
    '''
    with tf.variable_scope('Conv1'):
        net = resface_pre(images,64,scope='Conv1_pre')
        net = slim.repeat(net,2,resface_block,64,scope='Conv_1')
    with tf.variable_scope('Conv2'):
        net = resface_pre(net,128,scope='Conv2_pre')
        net = slim.repeat(net,4,resface_block,128,scope='Conv_2')
    with tf.variable_scope('Conv3'):
        net = resface_pre(net,256,scope='Conv3_pre')
        net = slim.repeat(net,8,resface_block,256,scope='Conv_3')
    with tf.variable_scope('Conv4'):
        net = resface_pre(net,512,scope='Conv4_pre')
        #net = resface_block(Conv4_pre,512,scope='Conv4_1')
        net = slim.repeat(net,1,resface_block,512,scope='Conv4')

    with tf.variable_scope('Logits'):
        #pylint: disable=no-member
        #net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID',
        #                      scope='AvgPool')
        net = slim.flatten(net)
        net = slim.dropout(net, keep_probability, is_training=phase_train,
                           scope='Dropout')
    net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, 
            scope='Bottleneck', reuse=False)    
    return net,''
Пример #14
0
def build_single_inceptionv1(train_tfdata, is_train, dropout_keep_prob):
    with slim.arg_scope(inception.inception_v1_arg_scope()):
        identity, end_points = inception.inception_v1(train_tfdata, dropout_keep_prob = dropout_keep_prob, is_training=is_train)
        net = slim.avg_pool2d(end_points['Mixed_5c'], [7, 7], stride=1, scope='MaxPool_0a_7x7')
        net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b')
        feature = tf.squeeze(net, [1, 2])
    return identity, feature
Пример #15
0
	def __init__(self,is_training):
		
		self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,64,64,3],name='input_image')
		
		self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label')

		self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd')

		#logits, end_points = resnet_v2.resnet_v2_50(self.input_image, num_classes=100, is_training=True)

		# flatten_hist = tf.reshape(self.input_image,[-1,96])

		self.keep_prob = tf.placeholder(tf.float32)

		weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay)


		flatten_hist = tf.reshape(self.input_image,[-1,3*64*64])
		flatten_hist = tf.concat([flatten_hist,self.input_nlcd],1)
		x = slim.fully_connected(flatten_hist, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1')
		x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2')
		flatten_hist = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer, scope='decoder/fc_3')

		all_logits = []
		all_output = []

		for i in range(100):
			if i == 0 :
				current_input_x = flatten_hist
			else:
				current_output = tf.concat(all_output,1)
				current_input_x = tf.concat([flatten_hist,current_output],1)

			x = slim.fully_connected(current_input_x, 256,weights_regularizer=weights_regularizer)
			x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer)
			#x = slim.fully_connected(x, 17,weights_regularizer=weights_regularizer)

			x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training)
			all_logits.append(slim.fully_connected(x, 1, activation_fn=None, weights_regularizer=weights_regularizer))
			all_output.append(tf.sigmoid(all_logits[i]))

		final_logits = tf.concat(all_logits,1)
		final_output = tf.sigmoid(final_logits)

		self.output = final_output
		self.ce_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.input_label,logits=final_logits),1))

		slim.losses.add_loss(self.ce_loss)
		tf.summary.scalar('ce_loss',self.ce_loss)
		
		# l2 loss
		self.l2_loss = tf.add_n(slim.losses.get_regularization_losses())
		tf.summary.scalar('l2_loss',self.l2_loss)

		#total loss
		self.total_loss = slim.losses.get_total_loss()
		tf.summary.scalar('total_loss',self.total_loss)

		#self.output = tf.sigmoid(x)
Пример #16
0
def build_graph(top_k):
    # with tf.device('/cpu:0'):
    keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
    images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch')
    labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')

    conv_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv1')
    max_pool_1 = slim.max_pool2d(conv_1, [2, 2], [2, 2], padding='SAME')
    conv_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv2')
    max_pool_2 = slim.max_pool2d(conv_2, [2, 2], [2, 2], padding='SAME')
    conv_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3')
    max_pool_3 = slim.max_pool2d(conv_3, [2, 2], [2, 2], padding='SAME')

    flatten = slim.flatten(max_pool_3)
    fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.tanh, scope='fc1')
    logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2')
        # logits = slim.fully_connected(flatten, FLAGS.charset_size, activation_fn=None, reuse=reuse, scope='fc')
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))

    global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False)
    rate = tf.train.exponential_decay(2e-4, global_step, decay_steps=2000, decay_rate=0.97, staircase=True)
    train_op = tf.train.AdamOptimizer(learning_rate=rate).minimize(loss, global_step=global_step)
    probabilities = tf.nn.softmax(logits)

    tf.summary.scalar('loss', loss)
    tf.summary.scalar('accuracy', accuracy)
    merged_summary_op = tf.summary.merge_all()
    predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k)
    accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32))

    return {'images': images,
            'labels': labels,
            'keep_prob': keep_prob,
            'top_k': top_k,
            'global_step': global_step,
            'train_op': train_op,
            'loss': loss,
            'accuracy': accuracy,
            'accuracy_top_k': accuracy_in_top_k,
            'merged_summary_op': merged_summary_op,
            'predicted_distribution': probabilities,
            'predicted_index_top_k': predicted_index_top_k,
            'predicted_val_top_k': predicted_val_top_k}
Пример #17
0
 def inference(self):
     x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
     with slim.arg_scope([slim.conv2d, slim.fully_connected],
                         weights_initializer=tf.contrib.layers.xavier_initializer(),
                         weights_regularizer=slim.l2_regularizer(1e-6)):
         model = slim.conv2d(x, 96, [11, 11], 4, padding='VALID', scope='conv1')
         model = slim.max_pool2d(model, [3, 3], 2, scope='pool1')
         model = slim.conv2d(model, 256, [5, 5], 1, scope='conv2')
         model = slim.max_pool2d(model, [3, 3], 2, scope='pool2')
         model = slim.conv2d(model, 384, [3, 3], 1, scope='conv3')
         model = slim.conv2d(model, 384, [3, 3], 1, scope='conv4')
         model = slim.conv2d(model, 256, [3, 3], 1, scope='conv5')
         model = slim.max_pool2d(model, [3, 3], 2, scope='pool5')
         model = slim.flatten(model)
         model = slim.fully_connected(model, 4096, activation_fn=None, scope='fc1')
         model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do1')
         model = slim.fully_connected(model, 4096, activation_fn=None, scope='fc2')
         model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do2')
         model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fc3')
     return model
Пример #18
0
 def inference(self):
     x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
     with slim.arg_scope([slim.conv2d, slim.fully_connected],
                         weights_initializer=tf.contrib.layers.xavier_initializer(),
                         weights_regularizer=slim.l2_regularizer(0.0005)):
         model = slim.repeat(x, 2, slim.conv2d, 64, [3, 3], scope='conv1')
         model = slim.max_pool2d(model, [2, 2], scope='pool1')
         model = slim.repeat(model, 2, slim.conv2d, 128, [3, 3], scope='conv2')
         model = slim.max_pool2d(model, [2, 2], scope='pool2')
         model = slim.repeat(model, 3, slim.conv2d, 256, [3, 3], scope='conv3')
         model = slim.max_pool2d(model, [2, 2], scope='pool3')
         model = slim.repeat(model, 3, slim.conv2d, 512, [3, 3], scope='conv4')
         model = slim.max_pool2d(model, [2, 2], scope='pool4')
         model = slim.repeat(model, 3, slim.conv2d, 512, [3, 3], scope='conv5')
         model = slim.max_pool2d(model, [2, 2], scope='pool5')
         model = slim.flatten(model, scope='flatten5')
         model = slim.fully_connected(model, 4096, scope='fc6')
         model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do6')
         model = slim.fully_connected(model, 4096, scope='fc7')
         model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do7')
         model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fcX8')
     return model
Пример #19
0
def conv_net(inputs, hparams):
  """Builds the ConvNet from Kelz 2016."""
  with slim.arg_scope(
      [slim.conv2d, slim.fully_connected],
      activation_fn=tf.nn.relu,
      weights_initializer=tf.contrib.layers.variance_scaling_initializer(
          factor=2.0, mode='FAN_AVG', uniform=True)):

    net = inputs
    i = 0
    for (conv_temporal_size, conv_freq_size,
         num_filters, freq_pool_size, dropout_amt) in zip(
             hparams.temporal_sizes, hparams.freq_sizes, hparams.num_filters,
             hparams.pool_sizes, hparams.dropout_keep_amts):
      net = slim.conv2d(
          net,
          num_filters, [conv_temporal_size, conv_freq_size],
          scope='conv' + str(i),
          normalizer_fn=slim.batch_norm)
      if freq_pool_size > 1:
        net = slim.max_pool2d(
            net, [1, freq_pool_size],
            stride=[1, freq_pool_size],
            scope='pool' + str(i))
      if dropout_amt < 1:
        net = slim.dropout(net, dropout_amt, scope='dropout' + str(i))
      i += 1

    # Flatten while preserving batch and time dimensions.
    dims = tf.shape(net)
    net = tf.reshape(
        net, (dims[0], dims[1], net.shape[2].value * net.shape[3].value),
        'flatten_end')

    net = slim.fully_connected(net, hparams.fc_size, scope='fc_end')
    net = slim.dropout(net, hparams.fc_dropout_keep_amt, scope='dropout_end')

    return net
Пример #20
0
def Encoder_fc3_dropout(x,
                        num_output=85,
                        is_training=True,
                        reuse=False,
                        name="3D_module"):
    """
    3D inference module. 3 MLP layers (last is the output)
    With dropout  on first 2.
    Input:
    - x: N x [|img_feat|, |3D_param|]
    - reuse: bool

    Outputs:
    - 3D params: N x num_output
      if orthogonal: 
           either 85: (3 + 24*3 + 10) or 109 (3 + 24*4 + 10) for factored axis-angle representation
      if perspective:
          86: (f, tx, ty, tz) + 24*3 + 10, or 110 for factored axis-angle.
    - variables: tf variables
    """
    if reuse:
        print('Reuse is on!')
    with tf.variable_scope(name, reuse=reuse) as scope:
        net = slim.fully_connected(x, 1024, scope='fc1')
        net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout1')
        net = slim.fully_connected(net, 1024, scope='fc2')
        net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout2')
        small_xavier = variance_scaling_initializer(
            factor=.01, mode='FAN_AVG', uniform=True)
        net = slim.fully_connected(
            net,
            num_output,
            activation_fn=None,
            weights_initializer=small_xavier,
            scope='fc3')

    variables = tf.contrib.framework.get_variables(scope)
    return net, variables
Пример #21
0
 def construct_net(self,is_trained = True):
     with slim.arg_scope([slim.conv2d], padding='VALID',
                         weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
                         weights_regularizer=slim.l2_regularizer(0.0005)):
         net = slim.conv2d(self.input_images,6,[5,5],1,padding='SAME',scope='conv1')
         net = slim.max_pool2d(net, [2, 2], scope='pool2')
         net = slim.conv2d(net,16,[5,5],1,scope='conv3')
         net = slim.max_pool2d(net, [2, 2], scope='pool4')
         net = slim.conv2d(net,120,[5,5],1,scope='conv5')
         net = slim.flatten(net, scope='flat6')
         net = slim.fully_connected(net, 84, scope='fc7')
         net = slim.dropout(net, self.dropout,is_training=is_trained, scope='dropout8')
         digits = slim.fully_connected(net, 10, scope='fc9')
     return digits
Пример #22
0
 def inference(self):
     x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
     # scale (divide by MNIST std)
     x = x * 0.0125
     with slim.arg_scope([slim.conv2d, slim.fully_connected],
                         weights_initializer=tf.contrib.layers.xavier_initializer(),
                         weights_regularizer=slim.l2_regularizer(0.0005)):
         model = slim.conv2d(x, 20, [5, 5], padding='VALID', scope='conv1')
         model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool1')
         model = slim.conv2d(model, 50, [5, 5], padding='VALID', scope='conv2')
         model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool2')
         model = slim.flatten(model)
         model = slim.fully_connected(model, 500, scope='fc1')
         model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do1')
         model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fc2')
         return model
Пример #23
0
	def __init__(self,is_training):

		z_dim = FLAGS.z_dim
		batch_size = FLAGS.batch_size

		self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,64,64,3],name='input_image')
		
		self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd')
		
		self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label')

		self.keep_prob = tf.placeholder(tf.float32)

		weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay)

		flatten_hist = tf.reshape(self.input_image,[-1,3*64*64])

		# flatten_hist = slim.fully_connected(flatten_hist, 1024,weights_regularizer=weights_regularizer,scope='fig/fc_1')
		# flatten_hist = slim.fully_connected(flatten_hist, 256,weights_regularizer=weights_regularizer, scope='fig/fc_2')
		# flatten_hist = slim.fully_connected(flatten_hist, 25,weights_regularizer=weights_regularizer, scope='fig/fc_3')

		self.image_feature_encoder = flatten_hist
		self.image_feature_decoder = flatten_hist
		
		############## Q(z|X) ###############


		############## Sample_z ###############

		eps = tf.random_normal(shape=[batch_size,z_dim])
		# self.sample_z = z_miu + tf.exp(z_logvar / 2) * eps
		self.sample_z = eps

		############## P(X|z) ###############

		x = tf.concat([self.input_nlcd,self.image_feature_decoder,self.sample_z],1)

		x = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1')
		x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2')
		x = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer, scope='decoder/fc_3')

		x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training)
		
		self.logits = slim.fully_connected(x, 100, activation_fn=None, weights_regularizer=weights_regularizer,scope='decoder/logits')

		self.output = tf.sigmoid(self.logits,name='decoder/output')
Пример #24
0
  def AddDropout(self, prev_layer, index):
    """Adds a dropout layer.

    Args:
      prev_layer: Input tensor.
      index:      Position in model_str to start parsing

    Returns:
      Output tensor, end index in model_str.
    """
    pattern = re.compile(R'(Do)({\w+})?')
    m = pattern.match(self.model_str, index)
    if m is None:
      return None, None
    name = self._GetLayerName(m.group(0), index, m.group(2))
    layer = slim.dropout(
        prev_layer, 0.5, is_training=self.is_training, scope=name)
    return layer, m.end()
Пример #25
0
def inception_v3(inputs,
                 num_classes=1000,
                 is_training=True,
                 dropout_keep_prob=0.8,
                 prediction_fn=slim.softmax,
                 spatial_squeeze=True,
                 reuse=None,
                 scope='InceptionV3'):
    with tf.variable_scope(scope,'InceptionV3',[inputs,num_classes],reuse=reuse) as scope:
        with slim.arg_scope([slim.batch_norm,slim.dropout],
                            is_training=is_training):
            net,end_points=inception_v3_base(inputs,scope=scope)
            with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'):
                aux_logits = end_points['mixed_6e']
                with tf.variable_scope('AuxLogits'):
                    aux_logits = slim.avg_pool2d(aux_logits, [5, 5],
                                                 stride=3, padding='VALID', scope='avgpool_1a_5x5')
                    aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='conv2d_1b_1x1')
                    aux_logits = slim.conv2d(
                        aux_logits, 768, [5, 5],
                        weights_initializer=trunc_normal(0.01),
                        padding='VALID', scope='conv2d_2a_5x5'
                    )
                    aux_logits = slim.conv2d(
                        aux_logits, num_classes, [1, 1], activation_fn=None,
                        normalizer_fn=None, weights_initializer=trunc_normal(0.001),
                        scope='conv2d_2b_1x1'
                    )
                    if spatial_squeeze:
                        aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')
                    end_points['AuxLogits'] = aux_logits

                    with tf.variable_scope('Logits'):
                        net = slim.avg_pool2d(net, [8, 8], padding='VALID', scope='avgpool_1a_8x8')
                        net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='dropout_1b')
                        end_points['PreLogits'] = net
                        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
                                             normalizer_fn=None, scope='conv2d_1c_1x1')
                        if spatial_squeeze:
                            logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
                    end_points['Logits'] = logits
                    end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
                return logits, end_points
Пример #26
0
	def __init__(self,is_training):
		
		self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_image')
		
		self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label')

		self.keep_prob = tf.placeholder(tf.float32)

		weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay)

		x = slim.fully_connected(self.input_nlcd, 256,weights_regularizer=weights_regularizer,scope='fc/fc_1')
		x = slim.fully_connected(x, 256,weights_regularizer=weights_regularizer, scope='fc/fc_2')
		x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer, scope='fc/fc_3')

		x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training)

		x = slim.fully_connected(inputs=x, num_outputs=100, activation_fn=None, biases_initializer=None, weights_regularizer=weights_regularizer,scope='fc/fc_4')

		self.output = tf.sigmoid(x)
Пример #27
0
def conv_net(x,is_training):
    # "updates_collections": None is very import ,without will only get 0.10
    batch_norm_params = {"is_training": is_training, "decay": 0.9, "updates_collections": None}
    #,'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ]
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params):
        with tf.variable_scope("ConvNet",reuse=tf.AUTO_REUSE):
            x = tf.reshape(x, [-1, 28, 28, 1])
            net = slim.conv2d(x, 6, [5,5], scope="conv_1")
            net = slim.max_pool2d(net, [2, 2],scope="pool_1")
            net = slim.conv2d(net, 12, [5,5], scope="conv_2")
            net = slim.max_pool2d(net, [2, 2], scope="pool_2")
            net = slim.flatten(net, scope="flatten")
            net = slim.fully_connected(net, 100, scope="fc")
            net = slim.dropout(net,is_training=is_training)
            net = slim.fully_connected(net, num_classes, scope="prob", activation_fn=None,normalizer_fn=None)
            return net
Пример #28
0
    def fast_rcnn_net(self):

        with tf.variable_scope('fast_rcnn_net'):
            with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer(self.weight_decay)):

                flatten_rois_features = slim.flatten(self.fast_rcnn_all_level_rois)

                net = slim.fully_connected(flatten_rois_features, 1024, scope='fc_1')
                if self.use_dropout:
                    net = slim.dropout(net, keep_prob=0.5, is_training=self.is_training, scope='dropout')

                net = slim.fully_connected(net, 1024, scope='fc_2')

                fast_rcnn_scores = slim.fully_connected(net, self.num_classes + 1, activation_fn=None,
                                                        scope='classifier')

                fast_rcnn_encode_boxes = slim.fully_connected(net, self.num_classes * 5, activation_fn=None,
                                                              scope='regressor')

                return fast_rcnn_encode_boxes, fast_rcnn_scores
Пример #29
0
def inference_network(x):
  """Inference network to parameterize variational model. It takes
  data as input and outputs the variational parameters.

  loc, scale = neural_network(x)
  """
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      activation_fn=tf.nn.elu,
                      normalizer_fn=slim.batch_norm,
                      normalizer_params={'scale': True}):
    net = tf.reshape(x, [M, 28, 28, 1])
    net = slim.conv2d(net, 32, 5, stride=2)
    net = slim.conv2d(net, 64, 5, stride=2)
    net = slim.conv2d(net, 128, 5, padding='VALID')
    net = slim.dropout(net, 0.9)
    net = slim.flatten(net)
    params = slim.fully_connected(net, d * 2, activation_fn=None)

  loc = params[:, :d]
  scale = tf.nn.softplus(params[:, d:])
  return loc, scale
Пример #30
0
def inference_network(x, xwidth=28, xheight=28, zdim=2):
  """Inference network to parameterize variational model. It takes
  data as input and outputs the variational parameters.
  mu, sigma = neural_network(x)
  """
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      activation_fn=tf.nn.elu,
                      normalizer_fn=slim.batch_norm,
                      normalizer_params={'scale': True}):
    net = tf.reshape(x, [N_MINIBATCH, 28, 28, 1])
    net = slim.conv2d(net, 32, 5, stride=2)
    net = slim.conv2d(net, 64, 5, stride=2)
    net = slim.conv2d(net, 128, 5, padding='VALID')
    net = slim.dropout(net, 0.9)
    net = slim.flatten(net)
    params = slim.fully_connected(net, zdim * 2, activation_fn=None)

  mu    = params[:, :zdim]
  #sigma = tf.nn.softplus(params[:, zdim:])
  sigma = params[:, zdim:]
  return mu, sigma
Пример #31
0
	def __init__(self,is_training):

		z_dim = FLAGS.z_dim

		self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,3,128],name='input_image')
		
		self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd')
		
		self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label')

		self.keep_prob = tf.placeholder(tf.float32)

		weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay)


		############## image feature ########
		# batch_norm = slim.batch_norm
		# batch_norm_params = {'is_training':is_training,'updates_collections':tf.GraphKeys.UPDATE_OPS,'decay':0.9,'epsilon':0.00001}

		#Padding: conv2d default is 'SAME'
		#Padding: pool2d default is 'VALID'
		
		# x = tf.expand_dims(self.input_image,-1)

		# x = slim.conv2d(scope='encoder/conv1',inputs=x,num_outputs=16,kernel_size=[3,3],stride=[3,1],
		# 	normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer)

		# x = slim.max_pool2d(scope='encoder/pool1',inputs=x,kernel_size=[3,2],stride=[3,2],padding='SAME')

		# x = slim.conv2d(scope='encoder/conv2',inputs=x,num_outputs=32,kernel_size=[1,3],stride=[1,1],
		# 	normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer)		

		# x = slim.max_pool2d(scope='encoder/pool2',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME')

		# x = slim.conv2d(scope='encoder/conv3',inputs=x,num_outputs=64,kernel_size=[1,3],stride=[1,1],
		# 	normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer)

		# x = slim.max_pool2d(scope='encoder/pool3',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME')

		# x = slim.conv2d(scope='encoder/conv4',inputs=x,num_outputs=128,kernel_size=[1,3],stride=[1,1],
		# 	normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer)

		# x = slim.max_pool2d(scope='encoder/pool4',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME')

		# x = slim.conv2d(scope='encoder/conv5',inputs=x,num_outputs=256,kernel_size=[1,3],stride=[1,1],
		# 	normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer)

		# x = slim.max_pool2d(scope='encoder/pool5',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME')

		# x = slim.conv2d(scope='encoder/conv6',inputs=x,num_outputs=512,kernel_size=[1,3],stride=[1,1],
		# 	normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer)

		# x = slim.max_pool2d(scope='encoder/pool6',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME')

		# x = tf.reshape(x,[-1,512])

		# x = slim.fully_connected(x, 256,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_1')
		# self.image_feature_encoder = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_2')

		

		flatten_hist = tf.reshape(self.input_image,[-1,3*128])
		# x = slim.fully_connected(flatten_hist, 256,weights_regularizer=weights_regularizer,scope='encoder/hist/fc_1')
		# x = slim.fully_connected(x, 256,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_2')
		# x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_3')
		# self.image_feature_encoder = x
		self.image_feature_encoder = flatten_hist
		

		#self.image_feature_encoder = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training)

		############## Q(z|X) ###############

		input_x = tf.concat([self.input_nlcd,self.image_feature_encoder,self.input_label],1)

		#input_x = tf.concat([self.input_nlcd,self.input_label],1)

		#input_x = slim.dropout(input_x,keep_prob=self.keep_prob,is_training=is_training)

		x = slim.fully_connected(input_x, 512,weights_regularizer=weights_regularizer,scope='encoder/fc_1')
		x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='encoder/fc_2')
		x = slim.fully_connected(x, 499,weights_regularizer=weights_regularizer, scope='encoder/fc_3')

		#x = x+input_x

		#dropout
		#x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training)

		self.z_miu = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='encoder/z_miu')
		z_logvar = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='encoder/z_logvar')

		############## Sample_z ###############

		# eps = tf.random_normal(shape=tf.shape(z_miu))
		# sample_z = z_miu + tf.exp(z_logvar / 2) * eps

		condition = tf.concat([self.input_nlcd,self.image_feature_encoder],1)

		x = slim.fully_connected(condition, 512,weights_regularizer=weights_regularizer,scope='condition/fc_1')
		x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='condition/fc_2')
		x = slim.fully_connected(x, 399,weights_regularizer=weights_regularizer, scope='condition/fc_3')
		#x = x+condition
		
		self.condition_miu = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='condition/z_miu')
		condition_logvar = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='condition/z_logvar')		

		############## Sample_z ###############

		eps = tf.random_normal(shape=tf.shape(self.z_miu))
		self.sample_z = self.z_miu + tf.exp(z_logvar / 2) * eps
		############## P(X|z) ###############

		flatten_hist = tf.reshape(self.input_image,[-1,3*128])
		self.image_feature_decoder = flatten_hist
		input_x = tf.concat([self.input_nlcd,self.image_feature_decoder,self.sample_z],1)
		#x = tf.concat([self.input_nlcd,sample_z],1)

		x = slim.fully_connected(input_x, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1')
		x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2')
		x = slim.fully_connected(x, 499,weights_regularizer=weights_regularizer, scope='decoder/fc_3')

		#x = x+input_x
		
		#dropout
		x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training)
		
		self.logits = slim.fully_connected(x, 100, activation_fn=None, weights_regularizer=weights_regularizer,scope='decoder/logits')

		self.output = tf.sigmoid(self.logits,name='decoder/output')

		# E[log P(X|z)]
		self.recon_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.input_label), 1))
		tf.summary.scalar('recon_loss',self.recon_loss)
		
		# D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian
		#self.kl_loss = tf.reduce_mean(0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_miu**2 - 1. - z_logvar, 1))
		self.kl_loss = tf.reduce_mean(gaussian_kld(self.z_miu,z_logvar,self.condition_miu,condition_logvar))
		tf.summary.scalar('kl_loss',self.kl_loss)

		# VAE loss
		self.vae_loss = self.recon_loss + self.kl_loss
		slim.losses.add_loss(self.vae_loss)
		tf.summary.scalar('vae_loss',self.vae_loss)
		
		# l2 loss
		self.l2_loss = tf.add_n(slim.losses.get_regularization_losses())
		tf.summary.scalar('l2_loss',self.l2_loss)

		#total loss
		self.total_loss = slim.losses.get_total_loss()
		tf.summary.scalar('total_loss',self.total_loss)





		# self.g_output = tf.sigmoid(x)

		# self.ce_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.input_label,logits=x),1))

		# tf.summary.scalar('ce_loss',self.ce_loss)

		# slim.losses.add_loss(self.ce_loss)		

		# self.l2_loss = tf.add_n(slim.losses.get_regularization_losses())

		# tf.summary.scalar('l2_loss',self.l2_loss)

		# self.total_loss = slim.losses.get_total_loss()

		# tf.summary.scalar('total_loss',self.total_loss)

		# self.output = tf.sigmoid(x)

		
Пример #32
0
def build_graph(top_k):
    keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
    images = tf.placeholder(dtype=tf.float32,
                            shape=[None, 64, 64, 1],
                            name='image_batch')
    labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')
    is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag')
    with tf.device('/cpu:0'):
        # network: conv2d->max_pool2d->conv2d->max_pool2d->conv2d->max_pool2d->conv2d->conv2d->
        # max_pool2d->fully_connected->fully_connected
        # 给slim.conv2d和slim.fully_connected准备了默认参数:batch_norm
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            normalizer_fn=slim.batch_norm,
                            normalizer_params={'is_training': is_training}):
            # conv3_1 = slim.conv2d(images, 64, (3, 3), 1, padding='SAME', scope='conv3_1')
            # conv3_2 = slim.conv2d(conv3_1, 64, (3, 3), 1, padding='SAME', scope='conv3_2')
            # max_pool_1 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool1')
            # conv3_3 = slim.conv2d(max_pool_1, 128, (3, 3), 1, padding='SAME', scope='conv3_3')
            # conv3_4 = slim.conv2d(conv3_3, 128, (3, 3), 1, padding='SAME', scope='conv3_4')
            # max_pool_2 = slim.max_pool2d(conv3_4, [2, 2], [2, 2], padding='SAME', scope='pool2')
            # conv3_5 = slim.conv2d(max_pool_2, 256, (3, 3), 1, padding="SAME", scope='conv3_5')
            # conv3_6 = slim.conv2d(conv3_5, 256, (3, 3), 1, padding="SAME", scope='conv3_6')
            # conv3_7 = slim.conv2d(conv3_6, 256, (3, 3), 1, padding="SAME", scope='conv3_7')
            # conv3_8 = slim.conv2d(conv3_7, 256, (3, 3), 1, padding="SAME", scope='conv3_8')
            # max_pool_3 = slim.max_pool2d(conv3_8, [2, 2], [2, 2], padding="SAME", scope='pool3')
            # conv3_9 = slim.conv2d(max_pool_3, 512, (3, 3), 1, padding="SAME", scope='conv3_9')
            # conv3_10 = slim.conv2d(conv3_9, 512, (3, 3), 1, padding="SAME", scope='conv3_10')
            # conv3_11 = slim.conv2d(conv3_10, 512, (3, 3), 1, padding="SAME", scope='conv3_11')
            # conv3_12 = slim.conv2d(conv3_11, 512, (3, 3), 1, padding="SAME", scope='conv3_12')
            # # max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='SAME', scope='pool4')
            # max_pool_4 = slim.max_pool2d(conv3_12, [2, 2], [2, 2], padding='VALID', scope='pool4')

            conv3_1 = slim.conv2d(images,
                                  64, [3, 3],
                                  1,
                                  padding='SAME',
                                  scope='conv3_1')
            max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool1')
            conv3_2 = slim.conv2d(max_pool_1,
                                  128, [3, 3],
                                  padding='SAME',
                                  scope='conv3_2')
            max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool2')
            conv3_3 = slim.conv2d(max_pool_2,
                                  256, [3, 3],
                                  padding='SAME',
                                  scope='conv3_3')
            max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool3')
            conv3_4 = slim.conv2d(max_pool_3,
                                  512, [3, 3],
                                  padding='SAME',
                                  scope='conv3_4')
            conv3_5 = slim.conv2d(conv3_4,
                                  512, [3, 3],
                                  padding='SAME',
                                  scope='conv3_5')
            max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2],
                                         padding='VALID',
                                         scope='pool4')

            # 将输入扁平化,但是保持batch_size
            flatten = slim.flatten(max_pool_4)
            # 注意随机失活是作用于数据,是把输入的数据按概率变为0,并且把未失活的数据变大让总期望不变.
            fc1 = slim.fully_connected(slim.dropout(flatten,
                                                    keep_prob=keep_prob),
                                       1024,
                                       activation_fn=tf.nn.relu,
                                       scope='fc1')
            # fc2 = slim.fully_connected(slim.dropout(fc1, keep_prob=keep_prob), 1024, activation_fn=tf.nn.relu,
            #                            scope='fc2')
            # 最终输出要分类的类的数目,得到的值便是所有样本在各个类上面的最终得分
            logits = slim.fully_connected(slim.dropout(fc1, keep_prob),
                                          FLAGS.charset_size,
                                          activation_fn=None,
                                          scope='fc2')

        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=labels))
        accuracy_top_1 = tf.reduce_mean(
            tf.cast(tf.equal(tf.arg_max(logits, 1), labels), tf.float32))
        # 获取bn层的信息
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            updates = tf.group(*update_ops)
            loss = control_flow_ops.with_dependencies([updates], loss)

        global_step = tf.get_variable("step", [],
                                      initializer=tf.constant_initializer(0.0),
                                      trainable=False)
        # learning_rate = tf.train.exponential_decay(learning_rate=FLAGS.start_learning_rate, global_step=global_step,
        #                                            decay_rate=FLAGS.decay_rate, decay_steps=FLAGS.decay_steps,
        #                                            staircase=True)
        optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
        # 创造一个操作,用来计算梯度,并且返回loss
        train_operation = slim.learning.create_train_op(
            loss, optimizer=optimizer, global_step=global_step)
        probabilities = tf.nn.softmax(logits)

        # 绘制loss accuracy曲线
        tf.summary.scalar('loss', loss)
        tf.summary.scalar('accuracy_top_1', accuracy_top_1)
        merged_summary_op = tf.summary.merge_all()
        # 返回top k 个预测结果及其概率;
        predicted_prob_top_k, predicted_index_top_k = tf.nn.top_k(
            probabilities, k=top_k)
        accuracy_in_top_k = tf.reduce_mean(
            tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32))

        return {
            'images': images,
            'labels': labels,
            'keep_prob': keep_prob,
            'top_k': top_k,
            'global_step': global_step,
            'train_operation': train_operation,
            'loss': loss,
            'is_training': is_training,
            'accuracy_top_1': accuracy_top_1,
            'accuracy_top_k': accuracy_in_top_k,
            'merged_summary_op': merged_summary_op,
            'predicted_distribution': probabilities,
            'predicted_index_top_k': predicted_index_top_k,
            'predicted_prob_top_k': predicted_prob_top_k
        }
    def build_network(self, sess, is_training=True):
        with tf.variable_scope('vgg_16', 'vgg_16'):
            # select initializers
            if cfg.TRAIN.TRUNCATED:
                initializer = tf.truncated_normal_initializer(mean=0.0,
                                                              stddev=0.01)
                initializer_bbox = tf.truncated_normal_initializer(
                    mean=0.0, stddev=0.001)
            else:
                initializer = tf.random_normal_initializer(mean=0.0,
                                                           stddev=0.01)
                initializer_bbox = tf.random_normal_initializer(mean=0.0,
                                                                stddev=0.001)

            net = slim.repeat(self._image,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              trainable=False,
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1')
            net = slim.repeat(net,
                              2,
                              slim.conv2d,
                              128, [3, 3],
                              trainable=False,
                              scope='conv2')
            net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2')
            net = slim.repeat(net,
                              3,
                              slim.conv2d,
                              256, [3, 3],
                              trainable=is_training,
                              scope='conv3')
            net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3')
            net = slim.repeat(net,
                              3,
                              slim.conv2d,
                              512, [3, 3],
                              trainable=is_training,
                              scope='conv4')
            net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4')
            net = slim.repeat(net,
                              3,
                              slim.conv2d,
                              512, [3, 3],
                              trainable=is_training,
                              scope='conv5')
            self._act_summaries.append(net)
            self._layers['head'] = net
            # build the anchors for the image
            self._anchor_component()  # generate self._anchors, []

            # rpn
            rpn = slim.conv2d(net,
                              512, [3, 3],
                              trainable=is_training,
                              weights_initializer=initializer,
                              scope="rpn_conv/3x3")
            self._act_summaries.append(rpn)
            rpn_cls_score = slim.conv2d(rpn,
                                        self._num_anchors * 2, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_cls_score')
            # change it so that the score has 2 as its channel size
            rpn_cls_score_reshape = self._reshape_layer(
                rpn_cls_score, 2, 'rpn_cls_score_reshape')
            rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape,
                                                       "rpn_cls_prob_reshape")
            rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape,
                                               self._num_anchors * 2,
                                               "rpn_cls_prob")
            rpn_bbox_pred = slim.conv2d(rpn,
                                        self._num_anchors * 4, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_bbox_pred')
            if is_training:
                rois, roi_scores = self._proposal_layer(
                    rpn_cls_prob, rpn_bbox_pred,
                    "rois")  # ind, x1, y1, x2, y2, score
                rpn_labels = self._anchor_target_layer(rpn_cls_score,
                                                       "anchor")  #
                # Try to have a determinestic order for the computing graph, for reproducibility
                with tf.control_dependencies([rpn_labels]):
                    rois, _ = self._proposal_target_layer(
                        rois, roi_scores, "rpn_rois")
            else:
                if cfg.TEST.MODE == 'nms':
                    rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred,
                                                   "rois")
                elif cfg.TEST.MODE == 'top':
                    rois, _ = self._proposal_top_layer(rpn_cls_prob,
                                                       rpn_bbox_pred, "rois")
                else:
                    raise NotImplementedError

            # rcnn
            if cfg.POOLING_MODE == 'crop':
                pool5 = self._crop_pool_layer(net, rois, "pool5")
            else:
                raise NotImplementedError

            pool5_flat = slim.flatten(pool5, scope='flatten')
            fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')  #4096
            if is_training:
                fc6 = slim.dropout(fc6,
                                   keep_prob=0.5,
                                   is_training=True,
                                   scope='dropout6')  #0.5
            fc7 = slim.fully_connected(fc6, 4096, scope='fc7')  #4096
            if is_training:
                fc7 = slim.dropout(fc7,
                                   keep_prob=0.5,
                                   is_training=True,
                                   scope='dropout7')  #0.5
            cls_score = slim.fully_connected(fc7,
                                             self._num_classes,
                                             weights_initializer=initializer,
                                             trainable=is_training,
                                             activation_fn=None,
                                             scope='cls_score')
            cls_prob = self._softmax_layer(cls_score, "cls_prob")
            bbox_pred = slim.fully_connected(
                fc7,
                self._num_classes * 4,
                weights_initializer=initializer_bbox,
                trainable=is_training,
                activation_fn=None,
                scope='bbox_pred')

            self._predictions["rpn_cls_score"] = rpn_cls_score
            self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
            self._predictions["rpn_cls_prob"] = rpn_cls_prob
            self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
            self._predictions["cls_score"] = cls_score
            self._predictions["cls_prob"] = cls_prob
            self._predictions["bbox_pred"] = bbox_pred
            self._predictions["rois"] = rois

            self._score_summaries.update(self._predictions)

            return rois, cls_prob, bbox_pred
def get_model(model_in, dropout_keeprate_node, train_config, scope):

    net = model_in
    with tf.variable_scope(name_or_scope=scope, values=[model_in]):

        # batch norm arg_scope
        with slim.arg_scope([train_config.normalizer_fn],
                            decay=train_config.batch_norm_decay,
                            fused=train_config.batch_norm_fused,
                            is_training=train_config.is_trainable,
                            activation_fn=train_config.activation_fn):

            if train_config.normalizer_fn == None:
                conv_activation_fn = train_config.activation_fn
            else:
                conv_activation_fn = None
            # max_pool arg_scope
            with slim.arg_scope([slim.max_pool2d],
                                stride=model_config['maxpool_stride'],
                                kernel_size=model_config['maxpool_ksize'],
                                padding='VALID'):

                # convolutional layer arg_scope
                with slim.arg_scope(
                    [slim.conv2d],
                        kernel_size=model_config['conv_ksize'],
                        stride=model_config['conv_stride'],
                        weights_initializer=train_config.weights_initializer,
                        weights_regularizer=train_config.weights_regularizer,
                        biases_initializer=train_config.biases_initializer,
                        trainable=train_config.is_trainable,
                        activation_fn=conv_activation_fn,
                        normalizer_fn=train_config.normalizer_fn):

                    net = slim.conv2d(inputs=net,
                                      num_outputs=model_chout_num['c1'],
                                      padding='SAME',
                                      scope='c1_conv')

                    net = slim.max_pool2d(inputs=net, scope='s2_pool')

                    net = slim.conv2d(inputs=net,
                                      num_outputs=model_chout_num['c3'],
                                      padding='VALID',
                                      scope='c3_conv')

                    net = slim.max_pool2d(inputs=net, scope='s4_pool')

                    net = slim.conv2d(inputs=net,
                                      num_outputs=model_chout_num['c5'],
                                      padding='VALID',
                                      scope='c5_conv')

        # output layer by fully-connected layer
        with slim.arg_scope([slim.fully_connected],
                            trainable=train_config.is_trainable):

            with slim.arg_scope([slim.dropout],
                                keep_prob=dropout_keeprate_node,
                                is_training=train_config.is_trainable):

                net = slim.fully_connected(
                    inputs=net,
                    num_outputs=model_chout_num['f6'],
                    activation_fn=train_config.activation_fn,
                    scope='f6_fc')

                net = slim.dropout(inputs=net, scope='f6_dropout')

                net = slim.fully_connected(inputs=net,
                                           num_outputs=model_chout_num['out'],
                                           activation_fn=None,
                                           scope='out_fc')

                out_logit = slim.dropout(inputs=net, scope='out_dropout')

                out_logit = tf.reshape(out_logit,
                                       shape=[-1, model_chout_num['out']])

        return out_logit
Пример #35
0
def _build_network(images,
                   num_outputs,
                   alpha,
                   keep_prob=1.0,
                   is_training=True,
                   scope='yolo'):
    with tf.variable_scope(scope):
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected],
                activation_fn=leaky_relu(alpha),
                weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
                weights_regularizer=slim.l2_regularizer((0.0005)),
                variables_collections='Variables'):
            net = tf.pad(images,
                         np.array([[0, 0], [3, 3], [3, 3], [0, 0]]),
                         name='pad_1')
            net = slim.conv2d(net,
                              64,
                              7,
                              2,
                              padding='VALID',
                              scope='conv_2',
                              trainable=False)
            net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
            net = slim.conv2d(net, 192, 3, scope='conv_4', trainable=False)
            net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5')
            net = slim.conv2d(net, 128, 1, scope='conv_6')
            net = slim.conv2d(net, 256, 3, scope='conv_7')
            net = slim.conv2d(net, 256, 1, scope='conv_8')
            net = slim.conv2d(net, 512, 3, scope='conv_9')
            net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10')
            net = slim.conv2d(net, 256, 1, scope='conv_11')
            net = slim.conv2d(net, 512, 3, scope='conv_12')
            net = slim.conv2d(net, 256, 1, scope='conv_13')
            net = slim.conv2d(net, 512, 3, scope='conv_14')
            net = slim.conv2d(net, 256, 1, scope='conv_15')
            net = slim.conv2d(net, 512, 3, scope='conv_16')
            net = slim.conv2d(net, 256, 1, scope='conv_17')
            net = slim.conv2d(net, 512, 3, scope='conv_18')
            net = slim.conv2d(net, 512, 1, scope='conv_19')
            # tf.summary.histogram('conv19', net)
            net = slim.conv2d(net, 1024, 3, scope='conv_20')
            # tf.summary.histogram('conv20', net)
            net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21')
            net = slim.conv2d(net, 512, 1, scope='conv_22')
            net = slim.conv2d(net, 1024, 3, scope='conv_23')
            net = slim.conv2d(net, 512, 1, scope='conv_24')
            net = slim.conv2d(net, 1024, 3, scope='conv_25')
            net = slim.conv2d(net, 1024, 3, scope='conv_26')
            # tf.summary.histogram('conv26', net)
            net = tf.pad(net,
                         np.array([[0, 0], [1, 1], [1, 1], [0, 0]]),
                         name='pad_27')
            net = slim.conv2d(net,
                              1024,
                              3,
                              2,
                              padding='VALID',
                              scope='conv_28')
            net = slim.conv2d(net, 1024, 3, scope='conv_29')
            net = slim.conv2d(net, 1024, 3, scope='conv_30')
            net = tf.transpose(net, [0, 3, 1, 2], name='trans_31')
            net = slim.flatten(net, scope='flat_32')
            net = slim.fully_connected(net, 512, scope='fc_33')
            net = slim.fully_connected(net, 4096, scope='fc_34')
            net = slim.dropout(net,
                               keep_prob=keep_prob,
                               is_training=is_training,
                               scope='dropout_35')
            net = slim.fully_connected(net,
                                       num_outputs,
                                       activation_fn=None,
                                       scope='fc_36')
            # net ~ batch * 7 * 7 * 30
        return net
Пример #36
0
def STsingle(inputs, outputs, loss_weight, labels):
    # Mean subtraction (BGR) for flying chairs
    mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean")
    # tf.tile(mean, [4,192,256,1])
    inputs = inputs - mean
    outputs = outputs - mean
    # Scaling to 0 ~ 1 or -0.4 ~ 0.6?
    inputs = tf.truediv(inputs, 255.0)
    outputs = tf.truediv(outputs, 255.0)

    # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss
    inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7)
    outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7)

    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected], 
                        activation_fn=tf.nn.elu):

        '''
        Shared conv layers
        '''
        conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1')
        # conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1')
        conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2')
        pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1')

        conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1')
        conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2')
        pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2')

        conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1')
        conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2')
        conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3')
        pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3')

        conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1')
        conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2')
        conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3')
        pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4')

        conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1')
        conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2')
        conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3')
        pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5')
        # print pool5.get_shape()
        '''
        Spatial branch
        '''
        flatten5 = slim.flatten(pool5, scope='flatten5')
        fc6 = slim.fully_connected(flatten5, 4096, scope='fc6')
        dropout6 = slim.dropout(fc6, 0.9, scope='dropout6')
        fc7 = slim.fully_connected(dropout6, 4096, scope='fc7')
        dropout7 = slim.dropout(fc7, 0.9, scope='dropout7')
        fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8')
        prob = tf.nn.softmax(fc8)
        actionPredictions = tf.argmax(prob, 1)

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels)
        actionLoss = tf.reduce_mean(cross_entropy)

        '''
        Temporal branch
        '''
        # Hyper-params for computing unsupervised loss
        epsilon = 0.0001 
        alpha_c = 0.3
        alpha_s = 0.3
        lambda_smooth = 0.8
        FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights")
        scale = 2       # for deconvolution

        # Expanding part
        pr5 = slim.conv2d(pool5, 2, [3, 3], activation_fn=None, scope='pr5')
        h5 = pr5.get_shape()[1].value
        w5 = pr5.get_shape()[2].value
        pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5])
        pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5])
        flow_scale_5 = 0.625    # (*20/32)
        loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights)
        upconv4 = slim.conv2d_transpose(pool5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4')
        pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4')
        concat4 = tf.concat(3, [pool4, upconv4, pr5to4])

        pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4')
        h4 = pr4.get_shape()[1].value
        w4 = pr4.get_shape()[2].value
        pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4])
        pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4])
        flow_scale_4 = 1.25    # (*20/16)
        loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights)
        upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3')
        pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3')
        concat3 = tf.concat(3, [pool3, upconv3, pr4to3])

        pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3')
        h3 = pr3.get_shape()[1].value
        w3 = pr3.get_shape()[2].value
        pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3])
        pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3])
        flow_scale_3 = 2.5    # (*20/8)
        loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights)
        upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2')
        pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2')
        concat2 = tf.concat(3, [pool2, upconv2, pr3to2])

        pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2')
        h2 = pr2.get_shape()[1].value
        w2 = pr2.get_shape()[2].value
        pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2])
        pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2])
        flow_scale_2 = 5.0    # (*20/4)
        loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights)
        upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1')
        pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1')
        concat1 = tf.concat(3, [pool1, upconv1, pr2to1])

        pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1')
        h1 = pr1.get_shape()[1].value
        w1 = pr1.get_shape()[2].value
        pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1])
        pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1])
        flow_scale_1 = 10.0    # (*20/2) 
        loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights)
        
        # Adding intermediate losses
        all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \
                    loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[0]*actionLoss
        slim.losses.add_loss(all_loss)

        losses = [loss1, loss2, loss3, loss4, loss5, actionLoss]
        flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5]
        
        predictions = [prev1, actionPredictions]
        return losses, flows_all, predictions
def main():
    parser = ArgumentParser()
    parser.add_argument('--checkpoint',
                        type=str,
                        dest='checkpoint',
                        help='dir or .ckpt file to load checkpoint from',
                        metavar='CHECKPOINT',
                        required=True)
    parser.add_argument('--out-path',
                        type=str,
                        dest='out_path',
                        help='model output directory',
                        metavar='MODEL_OUT',
                        required=True)
    opts = parser.parse_args()

    if not os.path.exists(opts.out_path):
        os.mkdir(opts.out_path)

    tf.reset_default_graph()
    ###############################################################################
    #   graph = build_graph(top_k=3)
    keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
    images = tf.placeholder(dtype=tf.float32,
                            shape=[None, 64, 64, 1],
                            name='input')
    labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')

    conv_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv1')
    max_pool_1 = slim.max_pool2d(conv_1, [2, 2], [2, 2], padding='SAME')
    conv_2 = slim.conv2d(max_pool_1,
                         128, [3, 3],
                         padding='SAME',
                         scope='conv2')
    max_pool_2 = slim.max_pool2d(conv_2, [2, 2], [2, 2], padding='SAME')
    conv_3 = slim.conv2d(max_pool_2,
                         256, [3, 3],
                         padding='SAME',
                         scope='conv3')
    max_pool_3 = slim.max_pool2d(conv_3, [2, 2], [2, 2], padding='SAME')

    flatten = slim.flatten(max_pool_3)
    fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob),
                               1024,
                               activation_fn=tf.nn.tanh,
                               scope='fc1')
    logits = slim.fully_connected(slim.dropout(fc1, keep_prob),
                                  3755,
                                  activation_fn=None,
                                  scope='fc2')
    #    flow = tf.cast(logits, tf.uint8, 'output')
    # logits = slim.fully_connected(flatten, FLAGS.charset_size, activation_fn=None, reuse=reuse, scope='fc')
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=labels))
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))

    global_step = tf.get_variable("step", [],
                                  initializer=tf.constant_initializer(0.0),
                                  trainable=False)
    rate = tf.train.exponential_decay(2e-4,
                                      global_step,
                                      decay_steps=2000,
                                      decay_rate=0.97,
                                      staircase=True)
    train_op = tf.train.AdamOptimizer(learning_rate=rate).minimize(
        loss, global_step=global_step)
    probabilities = tf.nn.softmax(logits)

    predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities,
                                                             k=3)
    predicted_val_top_k = tf.cast(predicted_val_top_k, tf.float32,
                                  'predicted_val_top_k')
    predicted_index_top_k = tf.cast(predicted_index_top_k, tf.float32,
                                    'predicted_index_top_k')
    output = tf.concat([predicted_val_top_k, predicted_index_top_k],
                       -1,
                       name='output')

    #  output = tf.
    accuracy_in_top_k = tf.reduce_mean(
        tf.cast(tf.nn.in_top_k(probabilities, labels, 3), tf.float32))

    #####################################################################################
    saver = tf.train.Saver()
    with tf.Session() as sess:

        saver.restore(sess, opts.checkpoint)
        predict_val, predict_index = sess.run(
            [predicted_val_top_k, predicted_index_top_k],
            feed_dict={
                images: np.zeros([1, 64, 64, 1]),
                keep_prob: 1.0
            })
        #save graph
        tf.train.write_graph(sess.graph_def, opts.out_path, 'model.pb')
        #put graph and parameters together
        freeze_graph.freeze_graph(opts.out_path + '/model.pb', '', False,
                                  opts.checkpoint, 'output',
                                  'save/restore_all', 'save/Const:0',
                                  opts.out_path + '/frozen_model.pb', False,
                                  "")

    print("done")
Пример #38
0
def inference(inputs,
          num_classes = 2,
          dropout_keep_prob=0.8,
          is_training=True,
          spatial_squeeze=True,
          scope='vgg_a',
          fc_conv_padding='VALID',
          global_pool=False):

    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        activation_fn = tf.nn.relu,
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
                        weights_regularizer=slim.l2_regularizer(0.00001),  # 越小惩罚项越重
                        normalizer_fn=tf.layers.batch_normalization,
                        normalizer_params={'training': is_training,
                                           'momentum': 0.95}
                        ):
        # 等于下面注释的代码
        # 等于下面注释的代码
        # 224x224x3 ->56x56x16
        end_points = {}
        with tf.variable_scope('layer1'):
            net = slim.repeat(inputs, 2, slim.conv2d, 16, [3, 3], scope='conv')
            net = SE_block(net, 4)
            net = slim.max_pool2d(net, kernel_size=[4, 4], stride=4, scope='pool')  # 32
            end_point = "layer1"
            end_points[end_point] = net
        # 第二层定义
        # 56x56x32 ->14x14x64
        with tf.variable_scope('layer2'):
            net = slim.repeat(net, 2, slim.conv2d, 32, [3, 3], scope='conv')
            net = slim.max_pool2d(net, kernel_size=[4, 4], stride=4, scope='pool')  # 16
            net = SE_block(net, 4)
            end_point = "layer2"
            end_points[end_point] = net

        # #第三层
        # 14x14x128->3x3
        with tf.variable_scope('layer3'):
            net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv')
            net = SE_block(net, 4)
            net = slim.max_pool2d(net, kernel_size=[4, 4], stride=4, scope='pool')  # 8
            end_point = "layer3"
            end_points[end_point] = net

        with tf.variable_scope('layer4'):
            net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv')
            net = SE_block(net, 4)
            net = slim.max_pool2d(net, kernel_size=[3, 3], stride=1, scope='pool')
            end_point = "layer3"
            end_points[end_point] = net

        with tf.variable_scope('layer5'):
            net = slim.flatten(net, scope='flattern')
            # net = slim.fully_connected(net, 64, scope='fc6')
            # end_point = "layer4"
            # end_points[end_point] = net

        # with tf.variable_scope('layer6'):
        #     net = slim.fully_connected(net, 64, scope='fc6')
        #     end_point = "layer8"
        #     end_points[end_point] = net

        with tf.variable_scope('layer7'):
            net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout')
            net = slim.fully_connected(net, num_classes, activation_fn=None, scope='fc8')

    return net, end_points
Пример #39
0
    def model_fn(self, is_training=True, *args, **kwargs):
        batch_image = tf.placeholder(tf.float32,
                                     (ctx.params.batch_size, 512, 512, 3),
                                     name='image')
        batch_label = None
        if is_training:
            batch_label = tf.placeholder(tf.int32,
                                         (ctx.params.batch_size, 512, 512),
                                         name='label')

        # preprocess
        rgb_channels = tf.split(batch_image, 3, 3)
        rgb_channels[0] = rgb_channels[0] - 128.0
        rgb_channels[1] = rgb_channels[1] - 128.0
        rgb_channels[2] = rgb_channels[2] - 128.0
        batch_image = tf.concat(rgb_channels, -1)

        # vgg 16
        layers = (
            'conv1_1',
            'relu1_1',
            'conv1_2',
            'relu1_2',
            'pool1',
            'conv2_1',
            'relu2_1',
            'conv2_2',
            'relu2_2',
            'pool2',
            'conv3_1',
            'relu3_1',
            'conv3_2',
            'relu3_2',
            'conv3_3',
            'relu3_3',
            'pool3',
            'conv4_1',
            'relu4_1',
            'conv4_2',
            'relu4_2',
            'conv4_3',
            'relu4_3',
            'pool4',
            'conv5_1',
            'relu5_1',
            'conv5_2',
            'relu5_2',
            'conv5_3',
            'relu5_3',
            'pool5',
        )

        net = batch_image
        net_collection = {}
        with slim.arg_scope(
            [slim.conv2d],
                weights_regularizer=slim.l2_regularizer(0.0001),
                normalizer_fn=None,
                activation_fn=None,
                weights_initializer=slim.variance_scaling_initializer()):
            for i, name in enumerate(layers):
                kind = name[:4]
                if kind == 'conv':
                    block_i = int(name[4])
                    output_channels = block_i * 64 if block_i < 5 else 512
                    net = slim.conv2d(net,
                                      output_channels, [3, 3],
                                      stride=[1, 1],
                                      padding='SAME')
                    net_collection[name] = net
                elif kind == 'relu':
                    net = tf.nn.relu(net)
                    net_collection[name] = net
                elif kind == 'pool':
                    net = slim.avg_pool2d(net, 2, stride=2, padding='SAME')
                    net_collection[name] = net

            pool5_output = net_collection['pool5']

            # fully conv
            conv6 = slim.conv2d(pool5_output,
                                4096, [7, 7],
                                stride=[1, 1],
                                padding='SAME')
            relu6 = tf.nn.relu(conv6)
            relu6 = slim.dropout(relu6, 0.5)

            conv7 = slim.conv2d(relu6,
                                4096, [1, 1],
                                stride=[1, 1],
                                padding='SAME')
            relu7 = tf.nn.relu(conv7)
            relu7 = slim.dropout(relu7)

            # FCN32S
            score_32 = slim.conv2d(relu7,
                                   ctx.params.class_num, [1, 1],
                                   stride=[1, 1],
                                   padding='SAME')
            score_32_up = slim.convolution2d_transpose(score_32,
                                                       ctx.params.class_num,
                                                       [4, 4], [2, 2])

            # FCN16S
            pool4_output = slim.conv2d(net_collection['pool4'],
                                       ctx.params.class_num, [1, 1],
                                       stride=[1, 1],
                                       padding='SAME')
            score_16 = score_32_up + pool4_output
            score_16_up = slim.convolution2d_transpose(score_16,
                                                       ctx.params.class_num,
                                                       [4, 4], [2, 2])

            # FCN8S
            pool3_output = slim.conv2d(net_collection['pool3'],
                                       ctx.params.class_num, [1, 1],
                                       stride=[1, 1],
                                       padding='SAME')
            score_8 = score_16_up + pool3_output
            score_8_up = slim.convolution2d_transpose(score_8,
                                                      ctx.params.class_num,
                                                      [4, 4], [2, 2])

            if is_training:
                one_hot_batch_label = tf.one_hot(batch_label,
                                                 ctx.params.class_num)
                one_hot_batch_label = tf.image.resize_bilinear(
                    one_hot_batch_label, [128, 128])

                # cross entropy
                fcn8_loss = tf.losses.softmax_cross_entropy(
                    one_hot_batch_label, score_8_up)
                return fcn8_loss
            else:
                logits = tf.nn.softmax(score_8_up)
                return logits
Пример #40
0
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None):
  """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
  outputs = {}
  #arg_scope = _extra_conv_arg_scope(activation_fn=None)
  arg_scope = _extra_conv_arg_scope_with_bn(activation_fn=None)
  my_sigmoid = None
  with slim.arg_scope(arg_scope):
    with tf.variable_scope('pyramid'):
        # for p in pyramid:
        outputs['rpn'] = {}
        for i in range(5, 1, -1):
          p = 'P%d'%i
          stride = 2 ** i
          
          ## rpn head
          shape = tf.shape(pyramid[p])
          height, width = shape[1], shape[2]
          rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p)
          box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid)
          cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.01))

          anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)]
          print("anchor_scales = " , anchor_scales)
          all_anchors = gen_all_anchors(height, width, stride, anchor_scales)
          outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors}

        ## gather all rois
        # print (outputs['rpn'])
        rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)]  
        rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
        rpn_clses = tf.concat(values=rpn_clses, axis=0)
        rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

        outputs['rpn']['box'] = rpn_boxes
        outputs['rpn']['cls'] = rpn_clses
        outputs['rpn']['anchor'] = rpn_anchors
        # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors}
        
        rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
        rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw)
        # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1])
        rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \
                sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training)

        # if is_training:
        #     # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes)
        #     rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2)
        
        outputs['roi'] = {'box': rois, 'score': scores}

        ## cropping regions
        [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])

        outputs['assigned_rois'] = assigned_rois
        outputs['assigned_layer_inds'] = assigned_layer_inds

        cropped_rois = []
        ordered_rois = []
        pyramid_feature = []
        for i in range(5, 1, -1):
            print(i)
            p = 'P%d'%i
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            cropped, boxes_in_crop = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            # cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
            #                    pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
            ordered_rois.append(splitted_rois)
            pyramid_feature.append(tf.transpose(pyramid[p],[0,3,1,2]))
            # if i is 5:
            #     outputs['tmp_0'] = tf.transpose(pyramid[p],[0,3,1,2])
            #     outputs['tmp_1'] = splitted_rois
            #     outputs['tmp_2'] = tf.transpose(cropped,[0,3,1,2])
            #     outputs['tmp_3'] = boxes_in_crop
            #     outputs['tmp_4'] = [ih, iw]
            
        cropped_rois = tf.concat(values=cropped_rois, axis=0)
        ordered_rois = tf.concat(values=ordered_rois, axis=0)


        outputs['ordered_rois'] = ordered_rois
        outputs['pyramid_feature'] = pyramid_feature

        outputs['roi']['cropped_rois'] = cropped_rois
        tf.add_to_collection('__CROPPED__', cropped_rois)

        ## refine head
        # to 7 x 7
        cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME')
        refine = slim.flatten(cropped_regions)
        refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        refine = slim.fully_connected(refine,  1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        cls2 = slim.fully_connected(refine, num_classes, activation_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.05))
        box = slim.fully_connected(refine, num_classes*4, activation_fn=my_sigmoid, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.05))

        outputs['refined'] = {'box': box, 'cls': cls2}
        
        ## decode refine net outputs
        cls2_prob = tf.nn.softmax(cls2)
        final_boxes, classes, scores = \
                roi_decoder(box, cls2_prob, ordered_rois, ih, iw)

        #outputs['tmp_0'] = ordered_rois
        #outputs['tmp_1'] = assigned_rois
        #outputs['tmp_2'] = box
        #outputs['tmp_3'] = final_boxes
        #outputs['tmp_4'] = cls2_prob

        #outputs['final_boxes'] = {'box': final_boxes, 'cls': classes}
        outputs['final_boxes'] = {'box': final_boxes, 'cls': classes, 'prob': cls2_prob}
        ## for testing, maskrcnn takes refined boxes as inputs
        if not is_training:
          rois = final_boxes
          # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
          #       assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
          for i in range(5, 1, -1):
            p = 'P%d'%i
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            cropped, _ = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
            ordered_rois.append(splitted_rois)
          cropped_rois = tf.concat(values=cropped_rois, axis=0)
          ordered_rois = tf.concat(values=ordered_rois, axis=0)
          
        ## mask head
        m = cropped_rois
        for _ in range(4):
            m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu)
        # to 28 x 28
        m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu)
        tf.add_to_collection('__TRANSPOSED__', m)
        m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None)
          
        # add a mask, given the predicted boxes and classes
        outputs['mask'] = {'mask':m, 'cls': classes, 'score': scores}
          
  return outputs
Пример #41
0
def inception_resnet_v2(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        reuse=None,
                        scope='InceptionResnetV2'):
    """Creates the Inception Resnet V2 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):

                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 192
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_5a_3x3')
                end_points['MaxPool_5a_3x3'] = net

                # 35 x 35 x 320
                with tf.variable_scope('Mixed_5b'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 96,
                                                 1,
                                                 scope='Conv2d_1x1')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    48,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    64,
                                                    5,
                                                    scope='Conv2d_0b_5x5')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2_0 = slim.conv2d(net,
                                                    64,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2_0,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    96,
                                                    3,
                                                    scope='Conv2d_0c_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.avg_pool2d(net,
                                                     3,
                                                     stride=1,
                                                     padding='SAME',
                                                     scope='AvgPool_0a_3x3')
                        tower_pool_1 = slim.conv2d(tower_pool,
                                                   64,
                                                   1,
                                                   scope='Conv2d_0b_1x1')
                    net = tf.concat(3, [
                        tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1
                    ])

                end_points['Mixed_5b'] = net
                net = slim.repeat(net, 10, block35, scale=0.17)

                # 17 x 17 x 1024
                with tf.variable_scope('Mixed_6a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 384,
                                                 3,
                                                 stride=2,
                                                 padding='VALID',
                                                 scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1_0 = slim.conv2d(net,
                                                    256,
                                                    1,
                                                    scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1_0,
                                                    256,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv1_2 = slim.conv2d(tower_conv1_1,
                                                    384,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat(3, [tower_conv, tower_conv1_2, tower_pool])

                end_points['Mixed_6a'] = net
                net = slim.repeat(net, 20, block17, scale=0.10)

                with tf.variable_scope('Mixed_7a'):
                    with tf.variable_scope('Branch_0'):
                        tower_conv = slim.conv2d(net,
                                                 256,
                                                 1,
                                                 scope='Conv2d_0a_1x1')
                        tower_conv_1 = slim.conv2d(tower_conv,
                                                   384,
                                                   3,
                                                   stride=2,
                                                   padding='VALID',
                                                   scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_1'):
                        tower_conv1 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv1_1 = slim.conv2d(tower_conv1,
                                                    288,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_2'):
                        tower_conv2 = slim.conv2d(net,
                                                  256,
                                                  1,
                                                  scope='Conv2d_0a_1x1')
                        tower_conv2_1 = slim.conv2d(tower_conv2,
                                                    288,
                                                    3,
                                                    scope='Conv2d_0b_3x3')
                        tower_conv2_2 = slim.conv2d(tower_conv2_1,
                                                    320,
                                                    3,
                                                    stride=2,
                                                    padding='VALID',
                                                    scope='Conv2d_1a_3x3')
                    with tf.variable_scope('Branch_3'):
                        tower_pool = slim.max_pool2d(net,
                                                     3,
                                                     stride=2,
                                                     padding='VALID',
                                                     scope='MaxPool_1a_3x3')
                    net = tf.concat(3, [
                        tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool
                    ])

                end_points['Mixed_7a'] = net

                net = slim.repeat(net, 9, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1')
                end_points['Conv2d_7b_1x1'] = net

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    #pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

    return net, end_points
Пример #42
0
def vgg(inputs, is_train_phase, drop_prob=1.0):
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.truncated_normal_initializer(
                            0.0, 0.01),
                        weights_regularizer=slim.l2_regularizer(0.0005)):

        net = slim.conv2d(inputs, 64, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.dropout(net, 0.3)

        net = slim.conv2d(net, 64, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)

        net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME")

        net = slim.conv2d(net, 128, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.dropout(net, 0.4)

        net = slim.conv2d(net, 128, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)

        net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME")

        net = slim.conv2d(net, 256, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.dropout(net, 0.4)

        net = slim.conv2d(net, 256, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.dropout(net, 0.4)

        net = slim.conv2d(net, 256, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME")

        net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.dropout(net, 0.4)

        net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.dropout(net, 0.4)

        net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME")

        net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.dropout(net, 0.4)

        net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.dropout(net, 0.4)

        net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME")
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME")
        net = slim.dropout(net, 0.5)

        net = slim.fully_connected(slim.flatten(net), 512)
        net = tf.contrib.layers.batch_norm(net,
                                           center=True,
                                           scale=True,
                                           is_training=is_train_phase)
        net = slim.dropout(net, 0.5)
        net = slim.fully_connected(net, 10, activation_fn=None)
    return net
def inception_resnet_v1(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV1'):
    """Creates the Inception Resnet V1 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):

                # 149 x 149 x 32
                net = slim.conv2d(inputs,
                                  32,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net
                # 147 x 147 x 32
                net = slim.conv2d(net,
                                  32,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net
                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net
                # 73 x 73 x 64
                net = slim.max_pool2d(net,
                                      3,
                                      stride=2,
                                      padding='VALID',
                                      scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net
                # 73 x 73 x 80
                net = slim.conv2d(net,
                                  80,
                                  1,
                                  padding='VALID',
                                  scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net
                # 71 x 71 x 192
                net = slim.conv2d(net,
                                  192,
                                  3,
                                  padding='VALID',
                                  scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net
                # 35 x 35 x 256
                net = slim.conv2d(net,
                                  256,
                                  3,
                                  stride=2,
                                  padding='VALID',
                                  scope='Conv2d_4b_3x3')
                end_points['Conv2d_4b_3x3'] = net

                # 5 x Inception-resnet-A
                net = slim.repeat(net, 5, block35, scale=0.17)
                end_points['Mixed_5a'] = net

                # Reduction-A
                with tf.variable_scope('Mixed_6a'):
                    net = reduction_a(net, 192, 192, 256, 384)
                end_points['Mixed_6a'] = net

                # 10 x Inception-Resnet-B
                net = slim.repeat(net, 10, block17, scale=0.10)
                end_points['Mixed_6b'] = net

                cam_conv = net

                # Reduction-B
                with tf.variable_scope('Mixed_7a'):
                    net = reduction_b(net)
                end_points['Mixed_7a'] = net

                # 5 x Inception-Resnet-C
                net = slim.repeat(net, 5, block8, scale=0.20)
                end_points['Mixed_8a'] = net

                net = block8(net, activation_fn=None)
                end_points['Mixed_8b'] = net

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net

                    #pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')

                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, cam_conv, end_points
Пример #44
0
def phoc_prediction(features,
                    phoc_dim,
                    scope,
                    reuse=None,
                    L2_reg=0.0,
                    act_func=tf.nn.relu,
                    large_topology=False,
                    dropout=0.0):

    with slim.arg_scope(_args_scope(act_func, L2_reg)):
        with tf.variable_scope(scope, scope, [features], reuse=reuse) as sc:
            end_points_collection = sc.name + '_end_points'
            # Collect outputs for conv2d, fully_connected and max_pool2d.
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                    outputs_collections=end_points_collection):
                if large_topology:
                    phoc = slim.conv2d(features,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc4_phoc')
                    phoc = slim.conv2d(phoc,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc5_phoc')
                    phoc = slim.conv2d(phoc,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc6_phoc')
                    phoc = slim.conv2d(phoc,
                                       phoc_dim, [1, 1],
                                       stride=1,
                                       activation_fn=None,
                                       padding='VALID',
                                       scope='fc7_phoc')
                else:
                    phoc = slim.conv2d(features,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc1')
                    phoc = slim.dropout(phoc,
                                        keep_prob=1 - dropout,
                                        is_training=Kb.learning_phase(),
                                        scope='dropout_phoc1')
                    phoc = slim.conv2d(phoc,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc2')
                    phoc = slim.dropout(phoc,
                                        keep_prob=1 - dropout,
                                        is_training=Kb.learning_phase(),
                                        scope='dropout_phoc2')
                    phoc = slim.conv2d(phoc,
                                       phoc_dim, [1, 1],
                                       stride=1,
                                       activation_fn=None,
                                       padding='VALID',
                                       scope='linear')
                phoc = tf.squeeze(phoc, name='phoc_embd')

    return phoc
Пример #45
0
    # conv3: 18*18*64 -> 8*8*128
    conv3 = slim.conv2d(pool2,384,[2,2],stride=2,scope='conv3')

    # MNIST conv1: 8*8*128 -> 6*6*256 -> 4*4*256
    # no group because I only have 1 GPU
    conv4 = slim.conv2d(conv3,256,[2,2],stride=1,scope='conv4')
    pool4 = slim.max_pool2d(conv4,[2,2],stride=1,scope='pool4')

    # MNIST fc6: 4*4*256 -> 1*1*1024 -> 1024
    conv5 = slim.conv2d(pool4,1024,[4,4],stride=1,scope='conv5')
    fc = slim.flatten(conv5)

    # MNIST fc6: 1024 -> 1024
    fc1 = slim.fully_connected(fc,1024,scope='fc1')
    drop1 = slim.dropout(fc1,keep_prob)

    # MNIST fc7: 1024 -> 1024
    fc2 = slim.fully_connected(drop1,1024,scope='fc2')
    drop2 = slim.dropout(fc2,keep_prob)

    # fc8: 1024 -> 10
    y_conv = slim.fully_connected(drop2,10,scope='fc3')
    

loss = slim.losses.softmax_cross_entropy(y_conv, y)
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)

correct_prediction = tf.equal(tf.argmax(y_conv,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
 
Пример #46
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     is_training,
                     num_mixtures=None,
                     l2_penalty=1e-8,
                     **unused_params):
        """Creates a Mixture of (Logistic) Experts model.
     It also includes the possibility of gating the probabilities
     The model consists of a per-class softmax distribution over a
     configurable number of logistic classifiers. One of the classifiers in the
     mixture is not trained, and always predicts 0.
    Args:
      model_input: 'batch_size' x 'num_features' matrix of input features.
      vocab_size: The number of classes in the dataset.
      is_training: Is this the training phase ?
      num_mixtures: The number of mixtures (excluding a dummy 'expert' that
        always predicts the non-existence of an entity).
      l2_penalty: How much to penalize the squared magnitudes of parameter
        values.
    Returns:
      A dictionary with a tensor containing the probability predictions of the
      model in the 'predictions' key. The dimensions of the tensor are
      batch_size x num_classes.
    """
        num_mixtures = num_mixtures or FLAGS.moe_num_mixtures
        low_rank_gating = FLAGS.moe_low_rank_gating
        l2_penalty = FLAGS.moe_l2
        gating_probabilities = FLAGS.moe_prob_gating
        gating_input = FLAGS.moe_prob_gating_input

        input_size = model_input.get_shape().as_list()[1]
        remove_diag = FLAGS.gating_remove_diag

        if low_rank_gating == -1:
            gate_activations = slim.fully_connected(
                model_input,
                vocab_size * (num_mixtures + 1),
                activation_fn=None,
                biases_initializer=None,
                weights_regularizer=slim.l2_regularizer(l2_penalty),
                scope="gates")
        else:
            gate_activations1 = slim.fully_connected(
                model_input,
                low_rank_gating,
                activation_fn=None,
                biases_initializer=None,
                weights_regularizer=slim.l2_regularizer(l2_penalty),
                scope="gates1")
            gate_activations = slim.fully_connected(
                gate_activations1,
                vocab_size * (num_mixtures + 1),
                activation_fn=None,
                biases_initializer=None,
                weights_regularizer=slim.l2_regularizer(l2_penalty),
                scope="gates2")

        expert_activations_hidden = slim.fully_connected(
            model_input,
            input_size,
            activation_fn=tf.sigmoid,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="experts_hidden")

        expert_activations_hidden = slim.batch_norm(expert_activations_hidden,
                                                    center=True,
                                                    scale=True,
                                                    is_training=is_training,
                                                    scope="experts_hidden_bn")
        expert_activations_hidden = slim.dropout(expert_activations_hidden,
                                                 0.5,
                                                 scope='dropout')

        expert_activations_hidden = expert_activations_hidden + model_input
        expert_activations = slim.fully_connected(
            expert_activations_hidden,
            vocab_size * num_mixtures,
            activation_fn=None,
            weights_regularizer=slim.l2_regularizer(l2_penalty),
            scope="experts")

        gating_distribution = tf.nn.softmax(
            tf.reshape(gate_activations,
                       [-1, num_mixtures + 1
                        ]))  # (Batch * #Labels) x (num_mixtures + 1)
        expert_distribution = tf.nn.sigmoid(
            tf.reshape(expert_activations,
                       [-1, num_mixtures]))  # (Batch * #Labels) x num_mixtures

        probabilities_by_class_and_batch = tf.reduce_sum(
            gating_distribution[:, :num_mixtures] * expert_distribution, 1)
        probabilities = tf.reshape(probabilities_by_class_and_batch,
                                   [-1, vocab_size])

        if gating_probabilities:
            if gating_input == 'prob':
                gating_weights = tf.get_variable(
                    "gating_prob_weights", [vocab_size, vocab_size],
                    initializer=tf.random_normal_initializer(
                        stddev=1 / math.sqrt(vocab_size)))
                gates = tf.matmul(probabilities, gating_weights)
            else:
                gating_weights = tf.get_variable(
                    "gating_prob_weights", [input_size, vocab_size],
                    initializer=tf.random_normal_initializer(
                        stddev=1 / math.sqrt(vocab_size)))

                gates = tf.matmul(model_input, gating_weights)

            if remove_diag:
                # removes diagonals coefficients
                diagonals = tf.matrix_diag_part(gating_weights)
                gates = gates - tf.multiply(diagonals, probabilities)

            gates = slim.batch_norm(gates,
                                    center=True,
                                    scale=True,
                                    is_training=is_training,
                                    scope="gating_prob_bn")

            gates = tf.sigmoid(gates)

            probabilities = tf.multiply(probabilities, gates)

        return {"predictions": probabilities}
Пример #47
0
    def mobilenet_v2(self,
                     input_x,
                     is_training=True,
                     reuse=False,
                     keep_prob=0.8,
                     scope='mobilenet_v2'):
        # batch_norm parameters
        # bn_parameters = {'is_training': is_training, 'center':True, 'scale':True, 'decay':0.997}

        self.num_block = 0
        with tf.variable_scope(scope) as scope:
            if reuse:
                scope.reuse_variables()

            with slim.arg_scope([slim.conv2d, slim.separable_conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                                                      normalizer_fn=slim.batch_norm,
                                                                      #normalizer_params=bn_parameters,
                                                                      activation_fn=tf.nn.relu6),\
                    slim.arg_scope([slim.dropout], keep_prob=keep_prob) as s:

                conv0 = slim.conv2d(input_x, 32, 3, stride=1, scope='conv0')

                # bottleneck_residual_block
                bottleneck_1_1 = self.inverted_bottleneck_block(
                    conv0, 1, 16, False, is_training=is_training)
                bottleneck_2_1 = self.inverted_bottleneck_block(
                    bottleneck_1_1,
                    self.width_multiplier,
                    24,
                    False,
                    is_training=is_training)
                bottleneck_2_2 = self.inverted_bottleneck_block(
                    bottleneck_2_1,
                    self.width_multiplier,
                    24,
                    False,
                    is_training=is_training)
                bottleneck_3_1 = self.inverted_bottleneck_block(
                    bottleneck_2_2,
                    self.width_multiplier,
                    32,
                    True,
                    is_training=is_training)
                bottleneck_3_2 = self.inverted_bottleneck_block(
                    bottleneck_3_1,
                    self.width_multiplier,
                    32,
                    False,
                    is_training=is_training)
                bottleneck_3_3 = self.inverted_bottleneck_block(
                    bottleneck_3_2,
                    self.width_multiplier,
                    32,
                    False,
                    is_training=is_training)
                bottleneck_4_1 = self.inverted_bottleneck_block(
                    bottleneck_3_3,
                    self.width_multiplier,
                    64,
                    True,
                    is_training=is_training)
                bottleneck_4_2 = self.inverted_bottleneck_block(
                    bottleneck_4_1,
                    self.width_multiplier,
                    64,
                    False,
                    is_training=is_training)
                bottleneck_4_3 = self.inverted_bottleneck_block(
                    bottleneck_4_2,
                    self.width_multiplier,
                    64,
                    False,
                    is_training=is_training)
                bottleneck_4_4 = self.inverted_bottleneck_block(
                    bottleneck_4_3,
                    self.width_multiplier,
                    64,
                    False,
                    is_training=is_training)
                bottleneck_5_1 = self.inverted_bottleneck_block(
                    bottleneck_4_4,
                    self.width_multiplier,
                    96,
                    False,
                    is_training=is_training)
                bottleneck_5_2 = self.inverted_bottleneck_block(
                    bottleneck_5_1,
                    self.width_multiplier,
                    96,
                    False,
                    is_training=is_training)
                bottleneck_5_3 = self.inverted_bottleneck_block(
                    bottleneck_5_2,
                    self.width_multiplier,
                    96,
                    False,
                    is_training=is_training)
                bottleneck_6_1 = self.inverted_bottleneck_block(
                    bottleneck_5_3,
                    self.width_multiplier,
                    160,
                    True,
                    is_training=is_training)
                bottleneck_6_2 = self.inverted_bottleneck_block(
                    bottleneck_6_1,
                    self.width_multiplier,
                    160,
                    False,
                    is_training=is_training)
                bottleneck_6_3 = self.inverted_bottleneck_block(
                    bottleneck_6_2,
                    self.width_multiplier,
                    160,
                    False,
                    is_training=is_training)
                bottleneck_7_1 = self.inverted_bottleneck_block(
                    bottleneck_6_3,
                    self.width_multiplier,
                    320,
                    False,
                    is_training=is_training)

                conv8 = slim.conv2d(bottleneck_7_1,
                                    1280,
                                    3,
                                    stride=1,
                                    scope='conv8')

                # global average pooling
                filter_size = [
                    conv8.get_shape().as_list()[1],
                    conv8.get_shape().as_list()[2]
                ]
                avgpool = slim.avg_pool2d(conv8, filter_size, scope='avgpool')
                dropout = slim.dropout(avgpool)

                logits = tf.squeeze(
                    slim.conv2d(dropout,
                                self.num_class,
                                1,
                                stride=1,
                                activation_fn=None,
                                normalizer_fn=None))
            return logits
Пример #48
0
    def build_network(self, sess, is_training=True):
        with tf.variable_scope('vgg_16', 'vgg_16'):
            # select initializers
            if cfg.TRAIN.TRUNCATED:
                initializer = tf.truncated_normal_initializer(mean=0.0,
                                                              stddev=0.01)
                initializer_bbox = tf.truncated_normal_initializer(
                    mean=0.0, stddev=0.001)
            else:
                initializer = tf.random_normal_initializer(mean=0.0,
                                                           stddev=0.01)
                initializer_bbox = tf.random_normal_initializer(mean=0.0,
                                                                stddev=0.001)

            net = slim.repeat(self._image,
                              2,
                              slim.conv2d,
                              64, [3, 3],
                              trainable=False,
                              scope='conv1')
            net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1')
            net = slim.repeat(net,
                              2,
                              slim.conv2d,
                              128, [3, 3],
                              trainable=False,
                              scope='conv2')
            net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2')
            net = slim.repeat(net,
                              3,
                              slim.conv2d,
                              256, [3, 3],
                              trainable=is_training,
                              scope='conv3')

            #continue conv4
            net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3')
            net = slim.repeat(net,
                              2,
                              slim.conv2d,
                              512, [3, 3],
                              trainable=is_training,
                              scope='conv4')

            #store conv4_3
            self.endpoint['conv4_2'] = net

            #continue conv5/conv5_3
            net = slim.conv2d(net,
                              512, [3, 3],
                              trainable=is_training,
                              scope='conv4/conv4_3')

            #store conv4_3
            self.endpoint['conv4_3'] = net

            #continue conv5
            net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4')
            net = slim.repeat(net,
                              2,
                              slim.conv2d,
                              512, [3, 3],
                              trainable=is_training,
                              scope='conv5')

            #store conv5_2
            self.endpoint['conv5_2'] = net

            #continue conv5/conv5_3
            net = slim.conv2d(net,
                              512, [3, 3],
                              trainable=is_training,
                              scope='conv5/conv5_3')

            #store conv5_3
            self.endpoint['conv5_3'] = net
            self._layers['head'] = self.endpoint['conv5_3']

            # build the anchors for the image
            self._anchor_component()

            ###############################################RPN START####################################################################
            #-----------------------------------------------rpn 3------------------------------------------------------------#
            conv3_resize = slim.avg_pool2d(self.endpoint['conv4_2'], [2, 2],
                                           padding='SAME',
                                           scope='conv3_resize')

            # rpn 3
            rpn3 = slim.conv2d(conv3_resize,
                               512, [3, 3],
                               trainable=is_training,
                               weights_initializer=initializer,
                               scope="rpn3_conv/3x3")

            #combine
            scale_rpn3 = tf.Variable(tf.cast(1, tf.float32),
                                     trainable=is_training,
                                     name='scale_rpn3')
            rpn3 = tf.scalar_mul(scale_rpn3, rpn3)

            self._act_summaries.append(rpn3)
            rpn3_cls_score = slim.conv2d(rpn3,
                                         self._num_anchors * 2, [1, 1],
                                         trainable=is_training,
                                         weights_initializer=initializer,
                                         padding='VALID',
                                         activation_fn=None,
                                         scope='rpn3_cls_score')
            # change it so that the score has 2 as its channel size
            rpn3_cls_score_reshape = self._reshape_layer(
                rpn3_cls_score, 2, 'rpn3_cls_score_reshape')
            rpn3_cls_prob_reshape = self._softmax_layer(
                rpn3_cls_score_reshape, "rpn3_cls_prob_reshape")
            rpn3_cls_prob = self._reshape_layer(rpn3_cls_prob_reshape,
                                                self._num_anchors * 2,
                                                "rpn3_cls_prob")

            rpn3_cls_prob_reshape = tf.reshape(rpn3_cls_prob, [-1, 2])
            rpn3_reject_inds = tf.where(
                tf.greater(rpn3_cls_prob_reshape[:, 0], rpn3_reject))

            if is_training:
                #compute anchor1 loss
                rpn3_labels = self._anchor_target_layer(
                    rpn3_cls_score, "anchor3", [], rpn_batch3, OHEM3)

            #store3 rpn values
            self._predictions[
                "rpn3_cls_score_reshape"] = rpn3_cls_score_reshape

            #-----------------------------------------------rpn 2------------------------------------------------------------##
            conv4_resize = slim.avg_pool2d(self.endpoint['conv4_3'], [2, 2],
                                           padding='SAME',
                                           scope='conv4_3_resize')
            rpn2 = slim.conv2d(conv4_resize,
                               512, [3, 3],
                               trainable=is_training,
                               weights_initializer=initializer,
                               scope="rpn2_conv/3x3")

            #combine
            scale_rpn2 = tf.Variable(tf.cast(1, tf.float32),
                                     trainable=is_training,
                                     name='scale_rpn2')
            rpn2 = tf.scalar_mul(scale_rpn2, rpn2)
            rpn2 = self._score_add_up(rpn3, rpn2, factor1, factor2, 'rpn2')

            self._act_summaries.append(rpn2)

            rpn2_cls_score = slim.conv2d(rpn2,
                                         self._num_anchors * 2, [1, 1],
                                         trainable=is_training,
                                         weights_initializer=initializer,
                                         padding='VALID',
                                         activation_fn=None,
                                         scope='rpn2_cls_score_pre')

            #add up 2 scores rpn1 and rpn
            rpn2_cls_score = self._score_add_up(rpn3_cls_score, rpn2_cls_score,
                                                factor1, factor2,
                                                'rpn2_cls_score')

            #used added up score
            rpn2_cls_score_reshape = self._reshape_layer(
                rpn2_cls_score, 2, 'rpn2_cls_score_reshape')
            rpn2_cls_prob_reshape = self._softmax_layer(
                rpn2_cls_score_reshape, "rpn2_cls_prob_reshape")
            rpn2_cls_prob = self._reshape_layer(rpn2_cls_prob_reshape,
                                                self._num_anchors * 2,
                                                "rpn2_cls_prob")

            rpn2_cls_prob_reshape = tf.reshape(rpn2_cls_prob, [-1, 2])
            rpn2_reject_inds = tf.where(
                tf.greater(rpn2_cls_prob_reshape[:, 0], rpn2_reject))
            rpn2_reject_inds = tf.concat([rpn3_reject_inds, rpn2_reject_inds],
                                         0)

            if is_training:
                #compute anchor1 loss
                rpn2_labels = self._anchor_target_layer(
                    rpn2_cls_score, "anchor2", rpn3_reject_inds, rpn_batch2,
                    OHEM2)

            #store2 rpn values
            self._predictions[
                "rpn2_cls_score_reshape"] = rpn2_cls_score_reshape

            ##-----------------------------------------------rpn 1------------------------------------------------------------##
            # rpn 1
            rpn1 = slim.conv2d(self.endpoint['conv5_2'],
                               512, [3, 3],
                               trainable=is_training,
                               weights_initializer=initializer,
                               scope="rpn1_conv/3x3")

            scale_rpn1 = tf.Variable(tf.cast(1, tf.float32),
                                     trainable=is_training,
                                     name='scale_rpn1')
            rpn1 = tf.scalar_mul(scale_rpn1, rpn1)
            rpn1 = self._score_add_up(rpn2, rpn1, factor1, factor2, 'rpn1')

            self._act_summaries.append(rpn1)
            rpn1_cls_score = slim.conv2d(rpn1,
                                         self._num_anchors * 2, [1, 1],
                                         trainable=is_training,
                                         weights_initializer=initializer,
                                         padding='VALID',
                                         activation_fn=None,
                                         scope='rpn1_cls_score_pre')

            rpn1_cls_score = self._score_add_up(rpn2_cls_score, rpn1_cls_score,
                                                factor1, factor2,
                                                'rpn1_cls_score')

            # change it so that the score has 2 as its channel size
            rpn1_cls_score_reshape = self._reshape_layer(
                rpn1_cls_score, 2, 'rpn1_cls_score_reshape')
            rpn1_cls_prob_reshape = self._softmax_layer(
                rpn1_cls_score_reshape, "rpn1_cls_prob_reshape")
            rpn1_cls_prob = self._reshape_layer(rpn1_cls_prob_reshape,
                                                self._num_anchors * 2,
                                                "rpn1_cls_prob")

            rpn1_cls_prob_reshape = tf.reshape(rpn1_cls_prob, [-1, 2])
            rpn1_reject_inds = tf.where(
                tf.greater(rpn1_cls_prob_reshape[:, 0], rpn1_reject))
            rpn1_reject_inds = tf.concat([rpn2_reject_inds, rpn1_reject_inds],
                                         0)

            if is_training:
                #compute anchor1 loss
                rpn1_labels = self._anchor_target_layer(
                    rpn1_cls_score, "anchor1", rpn2_reject_inds, rpn_batch2,
                    OHEM1)

            #store1 rpn values
            self._predictions[
                "rpn1_cls_score_reshape"] = rpn1_cls_score_reshape

            ##---------------------------------------------rpn 1 done------------------------------------------------------------##

            ##-----------------------------------------------rpn-----------------------------------------------------------------##
            rpn = slim.conv2d(self.endpoint['conv5_3'],
                              512, [3, 3],
                              trainable=is_training,
                              weights_initializer=initializer,
                              scope="rpn_conv/3x3")

            scale_rpn = tf.Variable(tf.cast(1, tf.float32),
                                    trainable=is_training,
                                    name='scale_rpn')
            rpn = tf.scalar_mul(scale_rpn, rpn)
            rpn = self._score_add_up(rpn1, rpn, factor1, factor2, 'rpn')

            self._act_summaries.append(rpn)

            rpn_cls_score = slim.conv2d(rpn,
                                        self._num_anchors * 2, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_cls_score_pre')

            rpn_bbox_pred = slim.conv2d(rpn,
                                        self._num_anchors * 4, [1, 1],
                                        trainable=is_training,
                                        weights_initializer=initializer,
                                        padding='VALID',
                                        activation_fn=None,
                                        scope='rpn_bbox_pred')

            #add up 2 scores rpn1 and rpn
            rpn_cls_score = self._score_add_up(rpn1_cls_score, rpn_cls_score,
                                               factor1, factor2,
                                               'rpn_cls_score')

            #used added up score
            rpn_cls_score_reshape = self._reshape_layer(
                rpn_cls_score, 2, 'rpn_cls_score_reshape')
            rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape,
                                                       "rpn_cls_prob_reshape")
            rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape,
                                               self._num_anchors * 2,
                                               "rpn_cls_prob")

            if is_training:
                #compute anchor loss
                rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor",
                                                       rpn1_reject_inds,
                                                       rpn_batch1, OHEM)

            ######################################################RPN DONE##################################################################

            #---------------------------------------------------porposal is made here------------------------------------------------------#

            if is_training:
                # #compute anchor loss
                # rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor", rpn1_reject_inds)
                rois, roi_scores = self._proposal_layer(
                    rpn_cls_prob, rpn_bbox_pred, "rois", rpn1_reject_inds)

                # with tf.control_dependencies([rpn_labels]):
                #   rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")

            else:
                if cfg.TEST.MODE == 'nms':
                    rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred,
                                                   "rois", rpn1_reject_inds)
                elif cfg.TEST.MODE == 'top':
                    rois, _ = self._proposal_top_layer(rpn_cls_prob,
                                                       rpn_bbox_pred, "rois",
                                                       rpn1_reject_inds)
                else:
                    raise NotImplementedError

            #----------------------------------------------------------finish proposal-----------------------------------------------------#

            #############################################################RCNN START###############################################################

            #------------------------------------------------------rcnn 3----------------------------------------------------#
            # rcnn
            # generate target
            if is_training:
                with tf.control_dependencies([rpn_labels]):
                    rois, _, passinds3 = self._proposal_target_layer(
                        rois, roi_scores, "rpn3_rois", batch3)

            if cfg.POOLING_MODE == 'crop':
                pool31 = self._crop_pool_layer(conv3_resize, rois, "pool31")
            else:
                raise NotImplementedError

            pool31_conv = slim.conv2d(pool31,
                                      256, [1, 1],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      scope="pool31_conv")
            pool31_avg = slim.avg_pool2d(pool31_conv, [7, 7],
                                         padding='SAME',
                                         scope='pool31_avg',
                                         stride=1)
            pool31_flat = slim.flatten(pool31_avg, scope='flatten31')

            fc3_2 = slim.fully_connected(
                pool31_flat,
                512,
                scope='fc3_2',
                weights_initializer=tf.contrib.layers.xavier_initializer(),
                trainable=is_training)

            # if is_training:
            #   fc3_2 = slim.dropout(fc3_2, keep_prob=0.5, is_training=True, scope='fc3_2')

            #combine
            scale3_2 = tf.Variable(tf.cast(1, tf.float32),
                                   trainable=is_training,
                                   name='scale3_2')
            fc_combine3_2 = tf.scalar_mul(scale3_2, fc3_2)

            cls3_score = slim.fully_connected(fc_combine3_2,
                                              self._num_classes,
                                              weights_initializer=initializer,
                                              trainable=is_training,
                                              activation_fn=None,
                                              scope='cls3_score')
            #store RCNN3
            self._predictions["cls3_score"] = cls3_score

            cls3_prob = self._softmax_layer(cls3_score, "cls3_prob")

            #reject
            cls3_inds = tf.reshape(tf.where(tf.less(cls3_prob[:, 0], reject3)),
                                   [-1])
            rois = tf.gather(rois, tf.reshape(cls3_inds, [-1]))
            #fc_combine3_2 = tf.gather(fc_combine3_2, tf.reshape(cls3_inds,[-1]))
            cls3_score = tf.gather(cls3_score, tf.reshape(cls3_inds, [-1]))

            self._act_summaries.append(conv3_resize)

            #------------------------------------------------------rcnn 2----------------------------------------------------#
            #generate target
            if is_training:
                with tf.control_dependencies([rpn_labels]):
                    roi_scores = tf.gather(roi_scores,
                                           tf.reshape(cls3_inds, [-1]))
                    rois, _, passinds4 = self._proposal_target_layer(
                        rois, roi_scores, "rpn2_rois", batch2)
                    cls3_score = tf.gather(cls3_score,
                                           tf.reshape(passinds4, [-1]))
                    #fc_combine3_2 = tf.gather(fc_combine3_2, tf.reshape(passinds4,[-1]))

            if cfg.POOLING_MODE == 'crop':
                pool41 = self._crop_pool_layer(conv4_resize, rois, "pool41")
            else:
                raise NotImplementedError

            pool41_conv = slim.conv2d(pool41,
                                      256, [1, 1],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      scope="pool41_conv")
            pool41_avg = slim.avg_pool2d(pool41_conv, [7, 7],
                                         padding='SAME',
                                         scope='pool41_avg',
                                         stride=1)
            pool41_flat = slim.flatten(pool41_avg, scope='flatten41')

            fc4_2 = slim.fully_connected(
                pool41_flat,
                512,
                scope='fc4_2',
                weights_initializer=tf.contrib.layers.xavier_initializer(),
                trainable=is_training)

            # if is_training:
            #   fc4_2 = slim.dropout(fc4_2, keep_prob=0.5, is_training=True, scope='fc4_2')

            #fc4_2 = self._score_add_up(fc_combine3_2, fc4_2, factor1, factor2, 'fc_42_comb')

            #combine
            scale4_2 = tf.Variable(tf.cast(1, tf.float32),
                                   trainable=is_training,
                                   name='scale4_2')

            fc_combine4_2 = tf.scalar_mul(scale4_2, fc4_2)

            cls4_score = slim.fully_connected(fc_combine4_2,
                                              self._num_classes,
                                              weights_initializer=initializer,
                                              trainable=is_training,
                                              activation_fn=None,
                                              scope='cls4_score')

            #cls4_score = self._score_add_up(cls3_score, cls4_score, factor1, factor2, 'cls4_score')

            #store RCNN2
            self._predictions["cls2_score"] = cls4_score

            cls4_prob = self._softmax_layer(cls4_score, "cls4_prob")

            #reject
            cls4_inds = tf.reshape(tf.where(tf.less(cls4_prob[:, 0], reject2)),
                                   [-1])
            rois = tf.gather(rois, tf.reshape(cls4_inds, [-1]))
            #fc_combine4_2 = tf.gather(fc_combine4_2, tf.reshape(cls4_inds,[-1]))
            cls4_score = tf.gather(cls4_score, tf.reshape(cls4_inds, [-1]))
            cls3_score = tf.gather(cls3_score, tf.reshape(cls4_inds, [-1]))
            self._act_summaries.append(conv4_resize)

            # #---------------------------------------------------------rcnn 1---------------------------------------------------------------#
            #generate target
            if is_training:
                with tf.control_dependencies([rpn_labels]):
                    roi_scores = tf.gather(roi_scores,
                                           tf.reshape(cls4_inds, [-1]))
                    rois, _, passinds5 = self._proposal_target_layer(
                        rois, roi_scores, "rpn1_rois", batch1)
                    cls4_score = tf.gather(cls4_score,
                                           tf.reshape(passinds5, [-1]))
                    cls3_score = tf.gather(cls3_score,
                                           tf.reshape(passinds5, [-1]))
                    #fc_combine4_2 = tf.gather(fc_combine4_2, tf.reshape(passinds5,[-1]))

            if cfg.POOLING_MODE == 'crop':
                pool51 = self._crop_pool_layer(self.endpoint['conv5_2'], rois,
                                               "pool51")
            else:
                raise NotImplementedError

            pool51_conv = slim.conv2d(pool51,
                                      512, [1, 1],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      scope="pool51_conv")
            pool51_avg = slim.avg_pool2d(pool51_conv, [7, 7],
                                         padding='SAME',
                                         scope='pool51_avg',
                                         stride=1)
            pool51_flat = slim.flatten(pool51_avg, scope='flatten51')

            fc5_2 = slim.fully_connected(
                pool51_flat,
                512,
                scope='fc5_2',
                weights_initializer=tf.contrib.layers.xavier_initializer(),
                trainable=is_training)

            # if is_training:
            #   fc5_2 = slim.dropout(fc5_2, keep_prob=0.5, is_training=True, scope='fc5_2')

            #fc5_2 = self._score_add_up(fc_combine4_2, fc5_2, factor1, factor2, 'fc_52_comb')

            #combine
            scale5_2 = tf.Variable(tf.cast(1, tf.float32),
                                   trainable=is_training,
                                   name='scale5_2')

            fc_combine5_2 = tf.scalar_mul(scale5_2, fc5_2)

            cls5_score = slim.fully_connected(fc_combine5_2,
                                              self._num_classes,
                                              weights_initializer=initializer,
                                              trainable=is_training,
                                              activation_fn=None,
                                              scope='cls5_score')

            #cls5_score = self._score_add_up(cls4_score, cls5_score, factor1, factor2, 'cls5_score')

            #store RCNN2
            self._predictions["cls1_score"] = cls5_score

            cls5_prob = self._softmax_layer(cls5_score, "cls5_prob")

            #reject
            cls5_inds = tf.reshape(tf.where(tf.less(cls5_prob[:, 0], reject1)),
                                   [-1])
            rois = tf.gather(rois, tf.reshape(cls5_inds, [-1]))
            cls5_score = tf.gather(cls5_score, tf.reshape(cls5_inds, [-1]))
            cls4_score = tf.gather(cls4_score, tf.reshape(cls5_inds, [-1]))
            cls3_score = tf.gather(cls3_score, tf.reshape(cls5_inds, [-1]))
            self._act_summaries.append(self.endpoint['conv5_2'])

            #-------------------------------------------------------rcnn -------------------------------------------------------#
            #generate target
            if is_training:
                with tf.control_dependencies([rpn_labels]):
                    roi_scores = tf.gather(roi_scores,
                                           tf.reshape(cls5_inds, [-1]))
                    rois, _, passinds = self._proposal_target_layer(
                        rois, roi_scores, "rpn_rois", batch)
                    cls5_score = tf.gather(cls5_score,
                                           tf.reshape(passinds, [-1]))
                    cls4_score = tf.gather(cls4_score,
                                           tf.reshape(passinds, [-1]))
                    cls3_score = tf.gather(cls3_score,
                                           tf.reshape(passinds, [-1]))

            if cfg.POOLING_MODE == 'crop':
                pool5 = self._crop_pool_layer(self.endpoint['conv5_3'], rois,
                                              "pool5")
                self.endpoint['pool5'] = pool5
            else:
                raise NotImplementedError

            pool5_flat = slim.flatten(pool5, scope='flatten')
            self._predictions['p5f'] = pool5_flat

            fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6')
            if is_training:
                fc6 = slim.dropout(fc6,
                                   keep_prob=0.5,
                                   is_training=True,
                                   scope='dropout6')
            fc7 = slim.fully_connected(fc6, 4096, scope='fc7')
            if is_training:
                fc7 = slim.dropout(fc7,
                                   keep_prob=0.5,
                                   is_training=True,
                                   scope='dropout7')
            cls0_score = slim.fully_connected(fc7,
                                              self._num_classes,
                                              weights_initializer=initializer,
                                              trainable=is_training,
                                              activation_fn=None,
                                              scope='cls_score_pre')

            self._predictions["cls0_score"] = cls0_score

            # cls3_score_scale = tf.Variable(tf.cast(1, tf.float32), trainable = is_training, name = 'cls3_score_scale')
            # cls2_score_scale = tf.Variable(tf.cast(1, tf.float32), trainable = is_training, name = 'cls2_score_scale')
            # cls1_score_scale = tf.Variable(tf.cast(1, tf.float32), trainable = is_training, name = 'cls1_score_scale')
            # cls0_score_scale = tf.Variable(tf.cast(1, tf.float32), trainable = is_training, name = 'cls0_score_scale')

            cls_score = cls3_score * 0.25 + cls4_score * 0.25 + cls5_score * 0.25 + cls0_score * 0.25

            cls_prob = self._softmax_layer(cls_score, "cls_prob")
            bbox_pred = slim.fully_connected(
                fc7,
                self._num_classes * 4,
                weights_initializer=initializer_bbox,
                trainable=is_training,
                activation_fn=None,
                scope='bbox_pred')

            cls_prob = self._softmax_layer(cls_score, "cls_prob")

            self._act_summaries.append(self.endpoint['conv5_3'])
            ###########################################################RCNN DONE############################################################

            #store rpn values
            self._predictions["rpn_cls_score"] = rpn_cls_score
            self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
            self._predictions["rpn_cls_prob"] = rpn_cls_prob
            self._predictions["rpn_bbox_pred"] = rpn_bbox_pred

            #store RCNN
            self._predictions["cls_score"] = cls_score
            self._predictions["cls_prob"] = cls_prob
            self._predictions["bbox_pred"] = bbox_pred
            self._predictions["rois"] = rois
            #####only for training######

            self._score_summaries.update(self._predictions)

            return rois, cls_prob, bbox_pred
Пример #49
0
def STbaseline(inputs, outputs, loss_weight, labels):
    """
    Spatial stream based on VGG16
    Temporal stream based on Flownet simple
    """

    # Mean subtraction (BGR) for flying chairs
    mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean")
    # tf.tile(mean, [4,192,256,1])
    inputs = inputs - mean
    outputs = outputs - mean
    # Scaling to 0 ~ 1 or -0.4 ~ 0.6?
    inputs = tf.truediv(inputs, 255.0)
    outputs = tf.truediv(outputs, 255.0)

    # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss
    inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7)
    outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7)

    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], 
                        activation_fn=tf.nn.elu):       # original use leaky ReLU, now we use elu
        # Contracting part
        Tconv1   = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [7, 7], stride=2, scope='Tconv1')
        Tconv2   = slim.conv2d(Tconv1, 128, [5, 5], stride=2, scope='Tconv2')
        Tconv3_1 = slim.conv2d(Tconv2, 256, [5, 5], stride=2, scope='Tconv3_1')
        Tconv3_2 = slim.conv2d(Tconv3_1, 256, [3, 3], scope='Tconv3_2')
        Tconv4_1 = slim.conv2d(Tconv3_2, 512, [3, 3], stride=2, scope='Tconv4_1')
        Tconv4_2 = slim.conv2d(Tconv4_1, 512, [3, 3], scope='Tconv4_2')
        Tconv5_1 = slim.conv2d(Tconv4_2, 512, [3, 3], stride=2, scope='Tconv5_1')
        Tconv5_2 = slim.conv2d(Tconv5_1, 512, [3, 3], scope='Tconv5_2')
        Tconv6_1 = slim.conv2d(Tconv5_2, 1024, [3, 3], stride=2, scope='Tconv6_1')
        Tconv6_2 = slim.conv2d(Tconv6_1, 1024, [3, 3], scope='Tconv6_2')

        # Hyper-params for computing unsupervised loss
        epsilon = 0.0001 
        alpha_c = 0.25
        alpha_s = 0.37
        lambda_smooth = 1.0
        FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights")
        scale = 2       # for deconvolution

        # Expanding part
        pr6 = slim.conv2d(Tconv6_2, 2, [3, 3], activation_fn=None, scope='pr6')
        h6 = pr6.get_shape()[1].value
        w6 = pr6.get_shape()[2].value
        pr6_input = tf.image.resize_bilinear(inputs_norm, [h6, w6])
        pr6_output = tf.image.resize_bilinear(outputs_norm, [h6, w6])
        flow_scale_6 = 0.3125    # (*20/64)
        loss6, _ = loss_interp(pr6, pr6_input, pr6_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_6, FlowDeltaWeights)
        upconv5 = slim.conv2d_transpose(Tconv6_2, 512, [2*scale, 2*scale], stride=scale, scope='upconv5')
        pr6to5 = slim.conv2d_transpose(pr6, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr6to5')
        concat5 = tf.concat(3, [Tconv5_2, upconv5, pr6to5])

        pr5 = slim.conv2d(concat5, 2, [3, 3], activation_fn=None, scope='pr5')
        h5 = pr5.get_shape()[1].value
        w5 = pr5.get_shape()[2].value
        pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5])
        pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5])
        flow_scale_5 = 0.625    # (*20/32)
        loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights)
        upconv4 = slim.conv2d_transpose(concat5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4')
        pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4')
        concat4 = tf.concat(3, [Tconv4_2, upconv4, pr5to4])

        pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4')
        h4 = pr4.get_shape()[1].value
        w4 = pr4.get_shape()[2].value
        pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4])
        pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4])
        flow_scale_4 = 1.25    # (*20/16)
        loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights)
        upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3')
        pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3')
        concat3 = tf.concat(3, [Tconv3_2, upconv3, pr4to3])

        pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3')
        h3 = pr3.get_shape()[1].value
        w3 = pr3.get_shape()[2].value
        pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3])
        pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3])
        flow_scale_3 = 2.5    # (*20/8)
        loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights)
        upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2')
        pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2')
        concat2 = tf.concat(3, [Tconv2, upconv2, pr3to2])

        pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2')
        h2 = pr2.get_shape()[1].value
        w2 = pr2.get_shape()[2].value
        pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2])
        pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2])
        flow_scale_2 = 5.0    # (*20/4)
        loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights)
        upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1')
        pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1')
        concat1 = tf.concat(3, [Tconv1, upconv1, pr2to1])

        pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1')
        h1 = pr1.get_shape()[1].value
        w1 = pr1.get_shape()[2].value
        pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1])
        pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1])
        flow_scale_1 = 10.0    # (*20/2) 
        loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights)
        
    with slim.arg_scope([slim.conv2d, slim.fully_connected], 
                        activation_fn=tf.nn.relu,
                        weights_initializer=tf.truncated_normal_initializer(0.0, 0.01),
                        weights_regularizer=slim.l2_regularizer(0.0005)):

        # conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1')
        conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1')
        conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2')
        pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1')

        conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1')
        conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2')
        pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2')

        conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1')
        conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2')
        conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3')
        pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3')

        conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1')
        conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2')
        conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3')
        pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4')

        conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1')
        conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2')
        conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3')
        pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5')

        # Incorporate temporal feature
        concatST = tf.concat(3, [pool5, Tconv5_2])
        poolST = slim.max_pool2d(concatST, [2, 2])
        # print poolST.get_shape()
        concat2ST = tf.concat(3, [poolST, Tconv6_2])
        # print concat2ST.get_shape()
        concatDR = slim.conv2d(concat2ST, 512, [1, 1])
        # print concatDR.get_shape()

        flatten5 = slim.flatten(concatDR, scope='flatten5')
        fc6 = slim.fully_connected(flatten5, 4096, scope='fc6')
        dropout6 = slim.dropout(fc6, 0.9, scope='dropout6')
        fc7 = slim.fully_connected(dropout6, 4096, scope='fc7')
        dropout7 = slim.dropout(fc7, 0.9, scope='dropout7')
        fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8')
        prob = tf.nn.softmax(fc8)
        actionPredictions = tf.argmax(prob, 1)

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels)
        actionLoss = tf.reduce_mean(cross_entropy)

        # Adding intermediate losses
        all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \
                    loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[5]*loss6["total"] + \
                    loss_weight[0]*actionLoss
        slim.losses.add_loss(all_loss)

        losses = [loss1, loss2, loss3, loss4, loss5, loss6, actionLoss]
        # pr1 = tf.mul(tf.constant(20.0), pr1)
        flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5, pr6*flow_scale_6]
        
        predictions = [prev1, actionPredictions]
        return losses, flows_all, predictions
Пример #50
0
def inception_resnet_v2(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV1'):
    """Creates the Inception Resnet V1 model.
    Args:
      inputs: a 4-D tensor of size [batch_size, height, width, 3].
      num_classes: number of predicted classes.
      is_training: whether is training or not.
      dropout_keep_prob: float, the fraction to keep before final layer.
      reuse: whether or not the network and its variables should be reused. To be
        able to reuse 'scope' must be given.
      scope: Optional variable_scope.
    Returns:
      logits: the logits outputs of the model.
      end_points: the set of end_points from the inception model.
    """
    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout],
                            is_training=is_training):
            with slim.arg_scope(
                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                    stride=1,
                    padding='SAME'):

                # stem for input of Inception-Resnet-v2
                with tf.variable_scope('stem'):
                    net = stem(inputs)
                end_points['stem'] = net

                # 5 x Inception-resnet-A
                net = slim.repeat(net, 5, block35, scale=0.17)
                end_points['Mixed_5a'] = net

                # Reduction-A
                with tf.variable_scope('Mixed_6a'):
                    net = reduction_a(net, 256, 256, 384, 384)
                end_points['Mixed_6a'] = net

                # channel = int(np.shape(net)[-1])
                # net = Squeeze_excitation_layer(net, out_dim=channel, ratio=reduction_ratio, layer_name='SE_1')

                # 10 x Inception-Resnet-B
                net = slim.repeat(net, 10, block17, scale=0.10)
                end_points['Mixed_6b'] = net

                # Reduction-B
                with tf.variable_scope('Mixed_7a'):
                    net = reduction_b(net)
                end_points['Mixed_7a'] = net

                # channel = int(np.shape(net)[-1])
                # net = Squeeze_excitation_layer(net, out_dim=channel, ratio=reduction_ratio, layer_name='SE_2')

                # 5 x Inception-Resnet-C
                net = slim.repeat(net, 5, block8, scale=0.20)
                end_points['Mixed_8a'] = net

                # net = block8(net, activation_fn=None)
                # end_points['Mixed_8b'] = net

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net
                    #pylint: disable=no-member
                    net = slim.avg_pool2d(net,
                                          net.get_shape()[1:3],
                                          padding='VALID',
                                          scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
Пример #51
0
def mnist_model_dropout(inputs,
                        is_training=True,
                        emb_size=128,
                        l2_weight=1e-3,
                        batch_norm_decay=None,
                        img_shape=None,
                        new_shape=None,
                        dropout_keep_prob=0.8,
                        augmentation_function=None,
                        image_summary=False):  # pylint: disable=unused-argument
    """Construct the image-to-embedding vector model."""

    inputs = tf.cast(inputs, tf.float32)  # / 255.0
    if new_shape is not None:
        shape = new_shape
        inputs = tf.image.resize_images(inputs,
                                        tf.constant(new_shape[:2]),
                                        method=tf.image.ResizeMethod.BILINEAR)
    else:
        shape = img_shape
    net = inputs

    if is_training and augmentation_function is not None:
        tf.map_fn(lambda frame: augmentation_function(frame), inputs)

    if augmentation_function is not None:
        tf.map_fn(lambda frame: tf.image.per_image_standardization(frame),
                  inputs)

    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                        activation_fn=tf.nn.elu,
                        weights_regularizer=slim.l2_regularizer(l2_weight)):
        with slim.arg_scope([slim.dropout], is_training=is_training):

            net = slim.conv2d(net, 32, [3, 3], scope='conv1_1')
            net = slim.conv2d(net, 32, [3, 3], scope='conv1_2')
            net = slim.max_pool2d(net, [2, 2], scope='pool1')  # 14
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout1')

            net = slim.conv2d(net, 64, [3, 3], scope='conv2_1')
            net = slim.conv2d(net, 64, [3, 3], scope='conv2_2')
            net = slim.max_pool2d(net, [2, 2], scope='pool2')  # 7
            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout2')

            net = slim.conv2d(net, 128, [3, 3], scope='conv3_1')
            net = slim.conv2d(net, 128, [3, 3], scope='conv3_2')
            net = slim.max_pool2d(net, [2, 2], scope='pool3')  # 3

            net = slim.flatten(net, scope='flatten')

            net = slim.dropout(net,
                               dropout_keep_prob,
                               is_training=is_training,
                               scope='dropout3')

            emb = slim.fully_connected(net, emb_size, scope='fc1')

    return emb
Пример #52
0
def GoogLeNetv1(x, keep_prob):
    def inception_module(x, in_f, f_1, f_2_1, f_2_2, f_3_1, f_3_2, f_4_2):
        x1 = slim.conv2d(x,
                         f_1, [1, 1],
                         stride=1,
                         padding='SAME',
                         activation_fn=tf.nn.relu)

        x2_1 = slim.conv2d(x,
                           f_2_1, [1, 1],
                           stride=1,
                           padding='SAME',
                           activation_fn=tf.nn.relu)
        x2_2 = slim.conv2d(x2_1,
                           f_2_1, [3, 3],
                           stride=1,
                           padding='SAME',
                           activation_fn=tf.nn.relu)

        x3_1 = slim.conv2d(x,
                           f_3_1, [1, 1],
                           stride=1,
                           padding='SAME',
                           activation_fn=tf.nn.relu)
        x3_2 = slim.conv2d(x3_1,
                           f_3_2, [5, 5],
                           stride=1,
                           padding='SAME',
                           activation_fn=tf.nn.relu)

        x4_1 = slim.max_pool2d(x, [3, 3], stride=1, padding='SAME')
        x4_2 = slim.conv2d(x4_1,
                           f_4_2, [1, 1],
                           stride=1,
                           padding='SAME',
                           activation_fn=tf.nn.relu)
        x = tf.concat([x1, x2_2, x3_2, x4_2], axis=-1)

        return x

    x = slim.conv2d(x,
                    64, [7, 7],
                    stride=2,
                    padding="VALID",
                    activation_fn=tf.nn.relu)
    x = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME')
    x = tf.nn.local_response_normalization(x)

    x = slim.conv2d(x,
                    64, [1, 1],
                    stride=1,
                    padding='SAME',
                    activation_fn=tf.nn.relu)
    x = slim.conv2d(x,
                    192, [3, 3],
                    stride=1,
                    padding='SAME',
                    activation_fn=tf.nn.relu)
    x = tf.nn.local_response_normalization(x)
    x = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME')

    # inception 3a, 3b
    x = inception_module(x, 194, 64, 96, 128, 16, 32, 32)
    x = inception_module(x, 256, 128, 128, 192, 32, 96, 64)
    x = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME')

    # inception 4a
    x = inception_module(x, 480, 192, 96, 208, 16, 48, 64)

    # auxiliary loss1
    x_aux1 = slim.avg_pool2d(x, 5, padding='SAME', stride=1)
    x_aux1 = slim.conv2d(x_aux1,
                         128, [1, 1],
                         stride=1,
                         padding='SAME',
                         activation_fn=tf.nn.relu)
    mb, h, w, c = x_aux1.get_shape().as_list()
    x_aux1 = tf.reshape(x_aux1, [-1, h * w * c])
    x_aux1 = slim.fully_connected(x_aux1, 1024, activation_fn=tf.nn.relu)
    x_aux1 = slim.dropout(x_aux1, keep_prob=keep_prob)
    x_aux1 = slim.fully_connected(x_aux1, num_classes)

    # inception 4b, 4c, 4d
    x = inception_module(x, 512, 160, 112, 224, 24, 64, 64)
    x = inception_module(x, 512, 128, 128, 256, 24, 64, 64)
    x = inception_module(x, 512, 112, 144, 288, 32, 64, 64)

    # auxiliary loss2
    x_aux2 = slim.avg_pool2d(x, 5, padding='SAME', stride=1)
    x_aux2 = slim.conv2d(x_aux2,
                         128, [1, 1],
                         stride=1,
                         padding='SAME',
                         activation_fn=tf.nn.relu)
    mb, h, w, c = x_aux2.get_shape().as_list()
    x_aux2 = tf.reshape(x_aux2, [-1, h * w * c])
    x_aux2 = slim.fully_connected(x_aux2, 1024, activation_fn=tf.nn.relu)
    x_aux2 = slim.dropout(x_aux2, keep_prob=keep_prob)
    x_aux2 = slim.fully_connected(x_aux2, num_classes)

    # inception 4e, 5a, 5b
    x = inception_module(x, 528, 256, 160, 320, 32, 128, 128)
    x = slim.max_pool2d(x, 3, padding='SAME', stride=2)
    x = inception_module(x, 832, 256, 160, 320, 32, 128, 128)
    x = inception_module(x, 832, 384, 192, 384, 48, 128, 128)

    #x = slim.avg_pool2d(x, 7, stride=1, padding='SAME')
    #mb, h, w, c = x.get_shape().as_list()
    #x = tf.reshape(x, [-1, h * w * c])
    x = tf.reduce_mean(x, axis=[1, 2])
    x = slim.fully_connected(x, num_classes)

    return x, x_aux1, x_aux2
Пример #53
0
def classification(layer,keep_prob,num_class, scope=None):
    with tf.variable_scope(scope, 'classification', [layer]) as sc:
        drop = slim.dropout(layer,keep_prob=keep_prob)
        conv = slim.conv2d(drop,num_class,kernel_size=1)
        output = Upsampling(conv,8)
    return output
Пример #54
0
def build_graph(top_k):
    keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob')
    images = tf.placeholder(dtype=tf.float32,
                            shape=[None, 64, 64, 1],
                            name='image_batch')
    labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch')
    is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag')
    with tf.device('/gpu:5'):
        with slim.arg_scope([slim.conv2d, slim.fully_connected],
                            normalizer_fn=slim.batch_norm,
                            normalizer_params={'is_training': is_training}):
            conv3_1 = slim.conv2d(images,
                                  64, [3, 3],
                                  1,
                                  padding='SAME',
                                  scope='conv3_1')
            max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool1')
            conv3_2 = slim.conv2d(max_pool_1,
                                  128, [3, 3],
                                  padding='SAME',
                                  scope='conv3_2')
            max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool2')
            conv3_3 = slim.conv2d(max_pool_2,
                                  256, [3, 3],
                                  padding='SAME',
                                  scope='conv3_3')
            max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool3')
            conv3_4 = slim.conv2d(max_pool_3,
                                  512, [3, 3],
                                  padding='SAME',
                                  scope='conv3_4')
            conv3_5 = slim.conv2d(conv3_4,
                                  512, [3, 3],
                                  padding='SAME',
                                  scope='conv3_5')
            max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2],
                                         padding='SAME',
                                         scope='pool4')

            flatten = slim.flatten(max_pool_4)
            fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob),
                                       1024,
                                       activation_fn=tf.nn.relu,
                                       scope='fc1')
            logits = slim.fully_connected(slim.dropout(fc1, keep_prob),
                                          FLAGS.charset_size,
                                          activation_fn=None,
                                          scope='fc2')
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=labels))
        accuracy = tf.reduce_mean(
            tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32))

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            updates = tf.group(*update_ops)
            loss = control_flow_ops.with_dependencies([updates], loss)

        global_step = tf.get_variable("step", [],
                                      initializer=tf.constant_initializer(0.0),
                                      trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
        train_op = slim.learning.create_train_op(loss,
                                                 optimizer,
                                                 global_step=global_step)
        probabilities = tf.nn.softmax(logits)

        tf.summary.scalar('loss', loss)
        tf.summary.scalar('accuracy', accuracy)
        merged_summary_op = tf.summary.merge_all()
        predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities,
                                                                 k=top_k)
        accuracy_in_top_k = tf.reduce_mean(
            tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32))

    return {
        'images': images,
        'labels': labels,
        'keep_prob': keep_prob,
        'top_k': top_k,
        'global_step': global_step,
        'train_op': train_op,
        'loss': loss,
        'is_training': is_training,
        'accuracy': accuracy,
        'accuracy_top_k': accuracy_in_top_k,
        'merged_summary_op': merged_summary_op,
        'predicted_distribution': probabilities,
        'predicted_index_top_k': predicted_index_top_k,
        'predicted_val_top_k': predicted_val_top_k
    }
Пример #55
0
def _create_network(incoming,
                    num_classes,
                    reuse=None,
                    l2_normalize=True,
                    create_summaries=True,
                    weight_decay=1e-8):
    nonlinearity = tf.nn.elu
    conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
    conv_bias_init = tf.zeros_initializer()
    conv_regularizer = slim.l2_regularizer(weight_decay)
    fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
    fc_bias_init = tf.zeros_initializer()
    fc_regularizer = slim.l2_regularizer(weight_decay)

    def batch_norm_fn(x):
        return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn")

    network = incoming
    network = slim.conv2d(network,
                          32, [3, 3],
                          stride=1,
                          activation_fn=nonlinearity,
                          padding="SAME",
                          normalizer_fn=batch_norm_fn,
                          scope="conv1_1",
                          weights_initializer=conv_weight_init,
                          biases_initializer=conv_bias_init,
                          weights_regularizer=conv_regularizer)
    if create_summaries:
        tf.summary.histogram(network.name + "/activations", network)
        tf.summary.image("conv1_1/weights",
                         tf.transpose(
                             slim.get_variables("conv1_1/weights:0")[0],
                             [3, 0, 1, 2]),
                         max_images=128)
    network = slim.conv2d(network,
                          32, [3, 3],
                          stride=1,
                          activation_fn=nonlinearity,
                          padding="SAME",
                          normalizer_fn=batch_norm_fn,
                          scope="conv1_2",
                          weights_initializer=conv_weight_init,
                          biases_initializer=conv_bias_init,
                          weights_regularizer=conv_regularizer)
    if create_summaries:
        tf.summary.histogram(network.name + "/activations", network)

    # NOTE(nwojke): This is missing a padding="SAME" to match the CNN
    # architecture in Table 1 of the paper. Information on how this affects
    # performance on MOT 16 training sequences can be found in
    # issue 10 https://github.com/nwojke/deep_sort/issues/10
    network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1")

    network = residual_block(network,
                             "conv2_1",
                             nonlinearity,
                             conv_weight_init,
                             conv_bias_init,
                             conv_regularizer,
                             increase_dim=False,
                             is_first=True,
                             summarize_activations=create_summaries)
    network = residual_block(network,
                             "conv2_3",
                             nonlinearity,
                             conv_weight_init,
                             conv_bias_init,
                             conv_regularizer,
                             increase_dim=False,
                             summarize_activations=create_summaries)

    network = residual_block(network,
                             "conv3_1",
                             nonlinearity,
                             conv_weight_init,
                             conv_bias_init,
                             conv_regularizer,
                             increase_dim=True,
                             summarize_activations=create_summaries)
    network = residual_block(network,
                             "conv3_3",
                             nonlinearity,
                             conv_weight_init,
                             conv_bias_init,
                             conv_regularizer,
                             increase_dim=False,
                             summarize_activations=create_summaries)

    network = residual_block(network,
                             "conv4_1",
                             nonlinearity,
                             conv_weight_init,
                             conv_bias_init,
                             conv_regularizer,
                             increase_dim=True,
                             summarize_activations=create_summaries)
    network = residual_block(network,
                             "conv4_3",
                             nonlinearity,
                             conv_weight_init,
                             conv_bias_init,
                             conv_regularizer,
                             increase_dim=False,
                             summarize_activations=create_summaries)

    feature_dim = network.get_shape().as_list()[-1]
    print("feature dimensionality: ", feature_dim)
    network = slim.flatten(network)

    network = slim.dropout(network, keep_prob=0.6)
    network = slim.fully_connected(network,
                                   feature_dim,
                                   activation_fn=nonlinearity,
                                   normalizer_fn=batch_norm_fn,
                                   weights_regularizer=fc_regularizer,
                                   scope="fc1",
                                   weights_initializer=fc_weight_init,
                                   biases_initializer=fc_bias_init)

    features = network

    if l2_normalize:
        # Features in rows, normalize axis 1.
        features = slim.batch_norm(features, scope="ball", reuse=reuse)
        feature_norm = tf.sqrt(
            tf.constant(1e-8, tf.float32) +
            tf.reduce_sum(tf.square(features), [1], keep_dims=True))
        features = features / feature_norm

        with slim.variable_scope.variable_scope("ball", reuse=reuse):
            weights = slim.model_variable(
                "mean_vectors", (feature_dim, num_classes),
                initializer=tf.truncated_normal_initializer(stddev=1e-3),
                regularizer=None)
            scale = slim.model_variable("scale", (num_classes, ),
                                        tf.float32,
                                        tf.constant_initializer(
                                            0., tf.float32),
                                        regularizer=None)
            if create_summaries:
                tf.summary.histogram("scale", scale)
            # scale = slim.model_variable(
            #     "scale", (), tf.float32,
            #     initializer=tf.constant_initializer(0., tf.float32),
            #     regularizer=slim.l2_regularizer(1e-2))
            # if create_summaries:
            #     tf.scalar_summary("scale", scale)
            scale = tf.nn.softplus(scale)

        # Each mean vector in columns, normalize axis 0.
        weight_norm = tf.sqrt(
            tf.constant(1e-8, tf.float32) +
            tf.reduce_sum(tf.square(weights), [0], keep_dims=True))
        logits = scale * tf.matmul(features, weights / weight_norm)

    else:
        logits = slim.fully_connected(features,
                                      num_classes,
                                      activation_fn=None,
                                      normalizer_fn=None,
                                      weights_regularizer=fc_regularizer,
                                      scope="softmax",
                                      weights_initializer=fc_weight_init,
                                      biases_initializer=fc_bias_init)

    return features, logits
Пример #56
0
  def build_network(self, sess, is_training=True):
    #  pyramid network scales changes at different levels of pyramid
    self._anchor_scales = {}
 
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
    bottleneck = resnet_v1.bottleneck
    # choose different blocks for different number of layers
    if self._num_layers == 50:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 5 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 101:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 3 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 22 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    elif self._num_layers == 152:
      blocks = [
        resnet_utils.Block('block1', bottleneck,
                           [(256, 64, 1)] * 2 + [(256, 64, 2)]),
        resnet_utils.Block('block2', bottleneck,
                           [(512, 128, 1)] * 7 + [(512, 128, 2)]),
        # Use stride-1 for the last conv4 layer
        resnet_utils.Block('block3', bottleneck,
                           [(1024, 256, 1)] * 35 + [(1024, 256, 1)]),
        resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3)
      ]
    else:
      # other numbers are not supported
      raise NotImplementedError
    assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 4)
    if cfg.RESNET.FIXED_BLOCKS == 4:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net_conv4, endpoints = resnet_v1.resnet_v1(net,
                                           blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    elif cfg.RESNET.FIXED_BLOCKS > 0:
      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net = self.build_base()
        net, endpoints = resnet_v1.resnet_v1(net,
                                     blocks[0:cfg.RESNET.FIXED_BLOCKS],
                                     global_pool=False,
                                     include_root_block=False,
                                     scope=self._resnet_scope)

      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net_conv4, endpoints = resnet_v1.resnet_v1(net,
                                           blocks[cfg.RESNET.FIXED_BLOCKS:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    else:  # cfg.RESNET.FIXED_BLOCKS == 0
      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
        net = self.build_base()
        net_conv4, endpoints = resnet_v1.resnet_v1(net,
                                           blocks[0:-1],
                                           global_pool=False,
                                           include_root_block=False,
                                           scope=self._resnet_scope)
    pyramid = self.build_pyramid(endpoints)
    self._layers['head'] = net_conv4  # not sure what to do with this
    with tf.variable_scope(self._resnet_scope, self._resnet_scope):
      for i in range(5, 1, -1):
        p = i
        self._act_summaries.append(pyramid[p])
        self._feat_stride[p] = [2 ** i]
        shape = tf.shape(pyramid[p])
        h, w = shape[1], shape[2]
        
        #  in the paper they use only one anchor per layer of pyramid. But when I tried that we were frequently receiving no overlaps in anchor_target_proposal(...) 
        self._anchor_scales[p] = [2**(i-j) for j in range(self._num_scales-1, -1, -1)]
        self._anchor_component(p, h, w)

        # build the anchors for the image
        # rpn
        rpn = slim.conv2d(pyramid[p], 256, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3", stride=1)
        self._act_summaries.append(rpn)
        rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_cls_score')
        # change it so that the score has 2 as its channel size
        rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
        rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
        rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")
        rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,
                                  weights_initializer=initializer,
                                  padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
      
        if is_training:
          rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p)
          rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor", p)
          # Try to have a determinestic order for the computing graph, for reproducibility
          with tf.control_dependencies([rpn_labels]):
            rois, roi_scores = self._proposal_target_layer(rois, roi_scores, "rpn_rois", p)
        else:
          if cfg.TEST.MODE == 'nms':
            rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p)
          elif cfg.TEST.MODE == 'top':
            rois, roi_scores = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p)
          else:
            raise NotImplementedError
        self._predictions[p]['rois'] = rois
        self._predictions[p]['roi_scores'] = roi_scores
        self._predictions[p]['rpn_cls_score'] = rpn_cls_score 
        self._predictions[p]['rpn_cls_score_reshape'] = rpn_cls_score_reshape
        self._predictions[p]['rpn_cls_prob'] = rpn_cls_prob
        self._predictions[p]['rpn_bbox_pred'] = rpn_bbox_pred
    
    all_roi_scores = tf.concat(values=[self._predictions[p]['roi_scores'] for p in pyramid], axis=0)
    all_rois = tf.concat(values=[self._predictions[p]['rois'] for p in pyramid], axis=0)
    p_vals = [tf.fill([tf.shape(self._predictions[p]['roi_scores'])[0], 1], float(p)) for p in pyramid]
    p_roi = tf.concat(values=[tf.reshape(p_vals, [-1, 1]), all_rois], axis=1)
    
    if is_training:
      all_proposal_target_labels = tf.concat(values=[self._proposal_targets[p]['labels'] for p in pyramid], axis=0)
      all_proposal_target_bbox = tf.concat(values=[self._proposal_targets[p]['bbox_targets'] for p in pyramid], axis=0)
      all_proposal_target_inside_w = tf.concat(values=[self._proposal_targets[p]['bbox_inside_weights'] for p in pyramid], axis=0)
      all_proposal_target_outside_w = tf.concat(values=[self._proposal_targets[p]['bbox_outside_weights'] for p in pyramid], axis=0)

    cfg_key = self._mode
    if type(cfg_key) == bytes:
      cfg_key = cfg_key.decode('utf-8')
    nms_top_n = all_roi_scores.shape[0] \
                    if all_roi_scores.shape[0] < cfg[cfg_key].RPN_POST_NMS_TOP_N \
                    else cfg[cfg_key].RPN_POST_NMS_TOP_N
    _, top_indices = tf.nn.top_k(tf.reshape(all_roi_scores, [-1]), k=nms_top_n)
    p_roi = tf.gather(p_roi, top_indices)
    
    [assigned_rois, _, _] = \
                assign_boxes(all_rois, [all_rois, top_indices], [2, 3, 4, 5], 'assign_boxes')

    for p in range(5, 1, -1):
      splitted_rois = assigned_rois[p-2]

      # rcnn 
      if cfg.POOLING_MODE == 'crop':
        cropped_roi = self._crop_pool_layer(pyramid[p], splitted_rois, "cropped_roi", p) 
        self._predictions[p]['cropped_roi'] = cropped_roi
      else:
        raise NotImplementedError
    cropped_rois = [self._predictions[p_layer]['cropped_roi'] for p_layer in self._predictions]
    cropped_rois = tf.concat(values=cropped_rois, axis=0)


    cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME')
    refine = slim.flatten(cropped_regions)
    refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu)
    refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
    refine = slim.fully_connected(refine,  1024, activation_fn=tf.nn.relu)
    refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
    cls_score = slim.fully_connected(refine, self._num_classes, activation_fn=None, 
            weights_initializer=tf.truncated_normal_initializer(stddev=0.01))
    cls_prob = self._softmax_layer(cls_score, "cls_prob")
    bbox_pred = slim.fully_connected(refine, self._num_classes*4, activation_fn=my_sigmoid, 
            weights_initializer=tf.truncated_normal_initializer(stddev=0.001))

    self._predictions["cls_score"] = cls_score
    self._predictions["cls_prob"] = cls_prob
    self._predictions["bbox_pred"] = bbox_pred
    self._predictions["rois"] = tf.gather(all_rois, top_indices)
    
    if is_training:
      self._proposal_targets['labels'] = all_proposal_target_labels 
      self._proposal_targets['bbox_targets'] = all_proposal_target_bbox
      self._proposal_targets['bbox_inside_weights'] = all_proposal_target_inside_w
      self._proposal_targets['bbox_outside_weights'] = all_proposal_target_outside_w

    #self._score_summaries.update(self._predictions) #  score summaries not compatible w/ dict
    return self._predictions["rois"], cls_prob, bbox_pred
def inception_resnet_v1(inputs,
                        is_training=True,
                        dropout_keep_prob=0.8,
                        bottleneck_layer_size=128,
                        reuse=None,
                        scope='InceptionResnetV1'):
    """
    创建Inception Resnet V1模型
    :param inputs: [batch_size,height,width,3]的四维张量
    :param is_training: 是否训练
    :param dropout_keep_prob: float,在最后一layer前保存的分数
    :param bottleneck_layer_size: bottleneck layer的size
    :param reuse: 是否重用网络及其变量
    :param scope: 可选的variable_scope
    :return: net: 模型的logits输出
    :return end_points: 初始模型中的一组end_points
    """

    end_points = {}

    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
        with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training):
            with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'):

                # 149 x 149 x 32
                net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3')
                end_points['Conv2d_1a_3x3'] = net

                # 147 x 147 x 32
                net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3')
                end_points['Conv2d_2a_3x3'] = net

                # 147 x 147 x 64
                net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3')
                end_points['Conv2d_2b_3x3'] = net

                # 73 x 73 x 64
                net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3')
                end_points['MaxPool_3a_3x3'] = net

                # 73 x 73 x 80
                net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1')
                end_points['Conv2d_3b_1x1'] = net

                # 71 x 71 x 192
                net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3')
                end_points['Conv2d_4a_3x3'] = net

                # 35 x 35 x 256
                net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3')
                end_points['Conv2d_4b_3x3'] = net

                # 5 x Inception-Resnet-A
                net = slim.repeat(net, 5, block35, scale=0.17)

                # Reduction-A
                with tf.variable_scope('Mixed_6a'):
                    net = reduction_a(net, 192, 192, 256, 384)
                end_points['Mixed_6a'] = net

                # 10 x Inception-Resnet-B
                net = slim.repeat(net, 10, block17, scale=0.10)

                # Reduction-B
                with tf.variable_scope('Mixed_7a'):
                    net = reduction_b(net)
                end_points['Mixed_7a'] = net

                # 5 x Inception-Resnet-C
                net = slim.repeat(net, 5, block8, scale=0.20)
                net = block8(net, activation_fn=None)

                with tf.variable_scope('Logits'):
                    end_points['PrePool'] = net

                    net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8')
                    net = slim.flatten(net)

                    net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout')

                    end_points['PreLogitsFlatten'] = net

                net = slim.fully_connected(net,
                                           bottleneck_layer_size,
                                           activation_fn=None,
                                           scope='Bottleneck',
                                           reuse=False)

    return net, end_points
Пример #58
0
    def __init__(self, flags, is_training=True):
        self.is_training = is_training

        # None = batch_size
        self.image_ph = tf.placeholder(tf.float32,
                                       shape=(None, flags.feature_size))
        self.text_ph = tf.placeholder(tf.int64, shape=(None, None))
        self.hard_label_ph = tf.placeholder(tf.float32,
                                            shape=(None, flags.num_label))
        self.soft_logit_ph = tf.placeholder(tf.float32,
                                            shape=(None, flags.num_label))

        # None = batch_size * sample_size
        self.sample_ph = tf.placeholder(tf.int32, shape=(None, 2))
        self.reward_ph = tf.placeholder(tf.float32, shape=(None, ))

        self.tch_scope = tch_scope = 'tch'
        model_scope = nets_factory.arg_scopes_map[flags.image_model]
        vocab_size = utils.get_vocab_size(flags.dataset)
        with tf.variable_scope(tch_scope) as scope:
            with slim.arg_scope(
                    model_scope(weight_decay=flags.image_weight_decay)):
                iembed = self.image_ph
                iembed = slim.dropout(iembed,
                                      flags.image_keep_prob,
                                      is_training=is_training)

            with slim.arg_scope([slim.fully_connected],
                                weights_regularizer=slim.l2_regularizer(
                                    flags.text_weight_decay)):
                wembed = slim.variable(
                    'wembed',
                    shape=[vocab_size, flags.embedding_size],
                    initializer=tf.random_uniform_initializer(-0.1, 0.1))
                tembed = tf.nn.embedding_lookup(wembed, self.text_ph)
                tembed = tf.reduce_mean(tembed, axis=-2)

            with slim.arg_scope([slim.fully_connected],
                                weights_regularizer=slim.l2_regularizer(
                                    flags.tch_weight_decay),
                                biases_initializer=tf.zeros_initializer()):
                # cembed = tf.concat([tembed], 1)
                cembed = tf.concat([iembed, tembed], 1)
                self.logits = slim.fully_connected(cembed,
                                                   flags.num_label,
                                                   activation_fn=None)

            self.labels = tf.nn.softmax(self.logits)

            if not is_training:
                return

            save_dict, var_list = {}, []
            for variable in tf.trainable_variables():
                if not variable.name.startswith(tch_scope):
                    continue
                print('%-50s added to TCH saver' % variable.name)
                save_dict[variable.name] = variable
                var_list.append(variable)
            self.saver = tf.train.Saver(save_dict)

            self.global_step = global_step = tf.Variable(0, trainable=False)
            tn_size = utils.get_tn_size(flags.dataset)
            learning_rate = flags.tch_learning_rate
            self.learning_rate = utils.get_lr(flags, tn_size, global_step,
                                              learning_rate, tch_scope)

            # pre train
            pre_losses = self.get_pre_losses()
            self.pre_loss = tf.add_n(pre_losses,
                                     name='%s_pre_loss' % tch_scope)
            pre_losses.extend(self.get_regularization_losses())
            print('#pre_losses wt regularization=%d' % (len(pre_losses)))
            pre_optimizer = utils.get_opt(flags, self.learning_rate)
            self.pre_update = pre_optimizer.minimize(self.pre_loss,
                                                     global_step=global_step)

            # kdgan train
            kdgan_losses = self.get_kdgan_losses(flags)
            self.kdgan_loss = tf.add_n(kdgan_losses,
                                       name='%s_kdgan_loss' % tch_scope)
            kdgan_optimizer = utils.get_opt(flags, self.learning_rate)
            # self.kdgan_update = kdgan_optimizer.minimize(self.kdgan_loss, global_step=global_step)
            gvs = kdgan_optimizer.compute_gradients(self.kdgan_loss, var_list)
            cgvs = [(tf.clip_by_norm(gv[0], config.max_norm), gv[1])
                    for gv in gvs]
            self.kdgan_update = kdgan_optimizer.apply_gradients(
                cgvs, global_step=global_step)
Пример #59
0
 def __dropout(self,net):
     net_shape = net.get_shape().as_list() 
     noise_shape = [net_shape[0],1,1,net_shape[-1]]
     return slim.dropout(net, noise_shape=noise_shape)
Пример #60
0
    def base_pooling(self, x, b):
        reuse = self.get_reuse(self._roi_pool_call)
        self._roi_pool_call += 1
        scope = self.scope
        L2_reg = self.args.box_filter_L2_reg
        dropout = self.args.dropout

        def _args_scope():
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                    activation_fn=self.act_func,
                    weights_regularizer=slim.l2_regularizer(L2_reg)):
                with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
                    return arg_sc

        with slim.arg_scope(_args_scope()):
            with tf.variable_scope(scope, scope, [x, b], reuse=reuse) as sc:
                end_points_collection = sc.name + '_end_points'
                # Collect outputs for conv2d, fully_connected and max_pool2d.
                with slim.arg_scope(
                    [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
                    boxes_input = tf.identity(b[:, 1:], name='boxes')
                    batch_idx = tf.cast(b[:, 0],
                                        dtype=tf.int32,
                                        name='batch_idx')
                    pooled_features = tf.image.crop_and_resize(
                        x, boxes_input, batch_idx, crop_size=self.output_shape)
                    net = slim.conv2d(pooled_features,
                                      1024,
                                      self.output_shape,
                                      stride=[1, 1],
                                      padding='VALID',
                                      scope='conv1_phoc')
                    net = slim.conv2d(net,
                                      1024, [1, 1],
                                      stride=[1, 1],
                                      padding='VALID',
                                      scope='conv2_phoc')
                    # TODO: remove the flags
                    if not self.args.tiny_phoc:
                        net = slim.dropout(net,
                                           keep_prob=1 - dropout,
                                           is_training=Kb.learning_phase(),
                                           scope='dropout_phoc1')
                        net = slim.conv2d(net,
                                          1024, [1, 1],
                                          stride=[1, 1],
                                          padding='VALID',
                                          scope='conv3_phoc')
                    if not self.args.tiny_phoc and not self.args.bigger_phoc:
                        net = slim.dropout(net,
                                           keep_prob=1 - dropout,
                                           is_training=Kb.learning_phase(),
                                           scope='dropout_phoc2')
                        net = slim.conv2d(net,
                                          1024, [1, 1],
                                          stride=[1, 1],
                                          padding='VALID',
                                          scope='conv4_phoc')
                    net = slim.dropout(net,
                                       keep_prob=1 - dropout,
                                       is_training=Kb.learning_phase(),
                                       scope='dropout_phoc3')
                    net = slim.conv2d(net,
                                      1024, [1, 1],
                                      stride=1,
                                      scope='phoc_feature')
        return net