示例#1
0
文件: delf_v1.py 项目: bkj/tf-models
    def GetAttentionPrelogit(
            self,
            images,
            weight_decay=0.0001,
            attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
            attention_type=_SUPPORTED_ATTENTION_TYPES[0],
            kernel=1,
            training_resnet=False,
            training_attention=False,
            reuse=False,
            use_batch_norm=True):
        """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is
        in training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """

        # Construct Resnet50 features.
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
            _, end_points = self.GetResnet50Subnetwork(
                images, is_training=training_resnet, reuse=reuse)

        feature_map = end_points[self._target_layer_type]

        # Construct attention subnetwork on top of features.
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay,
                                           use_batch_norm=use_batch_norm)):
            with slim.arg_scope([slim.batch_norm],
                                is_training=training_attention):
                (prelogits, attention_prob, attention_score,
                 end_points) = self._GetAttentionSubnetwork(
                     feature_map,
                     end_points,
                     attention_nonlinear=attention_nonlinear,
                     attention_type=attention_type,
                     kernel=kernel,
                     reuse=reuse)

        return prelogits, attention_prob, attention_score, feature_map, end_points
示例#2
0
    def __init__(self, num_classes, train_layers=None, weights_path='DEFAULT'):

        """Create the graph of the resnetv1_50 model.
        """

        # Parse input arguments into class variables
        if weights_path == 'DEFAULT':
            self.WEIGHTS_PATH = "./pre_trained_models/resnet_v1_50.ckpt"
        else:
            self.WEIGHTS_PATH = weights_path
        self.train_layers = train_layers

        with tf.variable_scope("input"):
            self.image_size = resnet_v1.resnet_v1_50.default_image_size
            self.x_input = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3], name="x_input")
            self.y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input")
            self.learning_rate = tf.placeholder(tf.float32, name="learning_rate")

        # train
        with arg_scope(resnet_v1.resnet_arg_scope()):
            self.logits, _ = resnet_v1.resnet_v1_50(self.x_input,
                                                    num_classes=num_classes,
                                                    is_training=True,
                                                    reuse=tf.AUTO_REUSE
                                                    )

        # validation
        with arg_scope(resnet_v1.resnet_arg_scope()):
            self.logits_val, _ = resnet_v1.resnet_v1_50(self.x_input,
                                                        num_classes=num_classes,
                                                        is_training=False,
                                                        euse=tf.AUTO_REUSE
                                                        )

        with tf.name_scope("loss"):
            self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.y_input))
            self.loss_val = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_val, labels=self.y_input))

        with tf.name_scope("train"):

            self.global_step = tf.Variable(0, name="global_step", trainable=False)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

            var_list = [v for v in tf.trainable_variables() if v.name.split('/')[-2] in train_layers or v.name.split('/')[-3] in train_layers ]
            gradients = tf.gradients(self.loss, var_list)
            self.grads_and_vars = list(zip(gradients, var_list))
            optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)

            with tf.control_dependencies(update_ops):
                self.train_op = optimizer.apply_gradients(grads_and_vars=self.grads_and_vars, global_step=self.global_step)

        with tf.name_scope("probability"):
            self.probability = tf.nn.softmax(self.logits_val, name="probability")

        with tf.name_scope("prediction"):
            self.prediction = tf.argmax(self.logits_val, 1, name="prediction")

        with tf.name_scope("accuracy"):
            correct_prediction = tf.equal(self.prediction, tf.argmax(self.y_input, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
示例#3
0
  def GetAttentionPrelogit(
      self,
      images,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False,
      use_batch_norm=True):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels].
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is
        in training mode.
      reuse: Whether or not the layer and its variables should be reused.
      use_batch_norm: Whether or not to use batch normalization.

    Returns:
      prelogits: A tensor of size [batch, 1, 1, channels].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
      end_points: Set of activations for external use.
    """
    # Construct Resnet50 features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)):
      _, end_points = self.GetResnet50Subnetwork(
          images, is_training=training_resnet, reuse=reuse)

    feature_map = end_points[self._target_layer_type]

    # Construct attention subnetwork on top of features.
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, use_batch_norm=use_batch_norm)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        (prelogits, attention_prob, attention_score,
         end_points) = self._GetAttentionSubnetwork(
             feature_map,
             end_points,
             attention_nonlinear=attention_nonlinear,
             attention_type=attention_type,
             kernel=kernel,
             reuse=reuse)

    return prelogits, attention_prob, attention_score, feature_map, end_points
示例#4
0
def build_train_op(image_tensor, label_tensor, is_training):
    resnet_argscope = resnet_arg_scope(weight_decay=FLAGS.weight_decay)
    global_step = tf.get_variable(name="global_step",
                                  shape=[],
                                  dtype=tf.int32,
                                  trainable=False)
    with slim.arg_scope(resnet_argscope):
        logits, end_points = resnet_v1_50(image_tensor,
                                          is_training=is_training,
                                          num_classes=100)
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=label_tensor))
    accuracy = tf.reduce_sum(
        tf.cast(
            tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), label_tensor),
            tf.int32))
    end_points['loss'], end_points['accuracy'] = loss, accuracy
    if is_training:
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=FLAGS.learning_rate)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return train_op, end_points
    else:
        return None, end_points
示例#5
0
  def fully_connected(self, input_, num_outputs, is_training, initializer, layer_name):
    if layer_name not in self._net_desc:
      return super(resnetv1_sep, self).fully_connected(input_, num_outputs, is_training, 
                                                       initializer, layer_name)
    K = self._net_desc[layer_name]
    layer1_name = LayerName(layer_name + '_sep_K'+str(K))
    with arg_scope(
      [slim.fully_connected],
      trainable=False,
      normalizer_fn=None,
      normalizer_params=None,
      biases_initializer=None,
      biases_regularizer=None): #make first layer clean, no BN no biases no activation func
     
      net = slim.fully_connected(input_, K, weights_initializer=initializer,
                                trainable=is_training, activation_fn=None, scope=layer1_name)

    layer2_name = LayerName(layer_name)
#     layer2_name = LayerName(layer_name + '_K'+str(K))
    with slim.arg_scope(resnet_arg_scope(is_training=False)):
      with arg_scope(
        [slim.fully_connected],
        trainable=False,
        normalizer_fn=None,
        normalizer_params=None): #make second layer no BN but with biases
        net = slim.fully_connected(net, num_outputs, weights_initializer=initializer,
                                trainable=is_training, scope=layer2_name)
    return net
示例#6
0
  def rpn_convolution(self, net_conv4, is_training, initializer):
    layer_name = 'rpn_conv/3x3'

    if layer_name not in self._net_desc:
      return super(resnetv1_sep, self).rpn_convolution(net_conv4, is_training, initializer)

    K = self._net_desc[layer_name]
    layer1_name = LayerName(layer_name + '_sep_K'+str(K))
    with arg_scope(
      [slim.conv2d],
      trainable=False,
      normalizer_fn=None,
      normalizer_params=None,
      biases_initializer=None,
      biases_regularizer=None): #make first layer clean, no BN no biases no activation func
     
      net = slim.conv2d(net_conv4, K, [3, 1], trainable=is_training, weights_initializer=initializer,
                        scope=layer1_name)

      layer2_name = LayerName(layer_name)
#       layer2_name = LayerName(layer_name + '_K'+str(K))
    with slim.arg_scope(resnet_arg_scope(is_training=False)):
      with arg_scope(
        [slim.conv2d],
        trainable=False,
        normalizer_fn=None,
        normalizer_params=None): #make second layer no BN but with biases
        net = slim.conv2d(net, 512, [1, 3], trainable=is_training, weights_initializer=initializer,
                          scope=layer2_name)
    return net
def test_resnet_v1_50(img_dir):
    """
    Test ResNet-V1-50 with a single image.
    :param img_dir: Path of the image to be classified
    :return: classification result and probability of a single image
    """
    img = cv2.imread(img_dir)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = img.reshape((1, 224, 224, 3))

    tf.reset_default_graph()
    inputs = tf.placeholder(name='input_images',
                            shape=[None, 224, 224, 3],
                            dtype=tf.float32)
    with slim.arg_scope(resnet_arg_scope()):
        _, _ = resnet_v1_50(inputs, 1000, is_training=False)

    with tf.Session() as sess:
        tf.train.Saver().restore(sess, './models/resnet_v1_50.ckpt')
        inputs = sess.graph.get_tensor_by_name('input_images:0')
        outputs = sess.graph.get_tensor_by_name(
            'resnet_v1_50/SpatialSqueeze:0')
        pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0]
        prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0]

        pred, prob = sess.run([pred, prob], feed_dict={inputs: img})
        name = label_dict[pred + 1]

    print('Result of ResNet-V1-50:', name, prob)
    return name, prob
示例#8
0
  def rpn_convolution(self, net_conv4, is_training, initializer):
    layer_name = 'rpn_conv/3x3'

    if layer_name not in self._comp_weights_dict.keys():
      return slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer,
                        scope=layer_name)

    K = self._K_by_layer_dict[layer_name]
    layer1_name = LayerName(layer_name.replace('conv', 'convsep'))
    with arg_scope(
      [slim.conv2d],
      trainable=False,
      normalizer_fn=None,
      normalizer_params=None,
      biases_initializer=None,
      biases_regularizer=None): #make first layer clean, no BN no biases no activation func
     
      net = slim.conv2d(net_conv4, K, [3, 1], trainable=is_training, weights_initializer=initializer,
                        scope=layer1_name)

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
      with arg_scope(
        [slim.conv2d],
        trainable=False,
        normalizer_fn=None,
        normalizer_params=None): #make second layer no BN but with biases
        net = slim.conv2d(net, 512, [1, 3], trainable=is_training, weights_initializer=initializer,
                          scope=layer_name)
    return net
示例#9
0
def network_entire(images):
	'''
	A tensorflow operation that extracts features for a batch of images.

	Args:
		images: Numpy array of shape (n, h, w, 3).

	Returns:
		embedding: Tensor of shape (n, 128).
	'''
	# Normalization.
	images = images - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

	# Travel through the network and get the embedding.
	with slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
		_, endpoints = resnet_v1_50(images, num_classes=None, is_training=False, global_pool=True)

	endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
		endpoints['resnet_v1_50/block4'], [1, 2], name='pool5', keep_dims=False)

	with tf.name_scope('head'):
		endpoints = head(endpoints, embedding_dim, is_training=False)

	embedding = endpoints['emb']

	return embedding
示例#10
0
    def __init__(self):
        from nets import resnet_v1

        self.image_size = 224
        self.num_classes = 1000
        self.predictions_is_correct = False
        self.use_larger_step_size = False
        self.use_smoothed_grad = False

        # For dataprior attacks. gamma = A^2 * D / d in the paper
        self.gamma = 2.7

        batch_shape = [None, self.image_size, self.image_size, 3]
        self.x_input = tf.placeholder(tf.float32, shape=batch_shape)
        self.target_label = tf.placeholder(tf.int32, shape=[None])
        target_onehot = tf.one_hot(self.target_label, self.num_classes)

        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_50(
                self.x_input, num_classes=self.num_classes, is_training=False)

        self.predicted_labels = tf.argmax(end_points['predictions'], 1)
        #logits -= tf.reduce_min(logits)
        #real = tf.reduce_max(logits * target_onehot, 1)
        #other = tf.reduce_max(logits * (1 - target_onehot), 1)
        #self.loss = other - real
        self.loss = tf.nn.softmax_cross_entropy_with_logits(
            labels=target_onehot, logits=logits)
        self.grad = 255.0 * tf.gradients(self.loss, self.x_input)[0]

        saver = tf.train.Saver(slim.get_model_variables(scope='resnet_v1'))
        self.sess = tf.get_default_session()
        saver.restore(self.sess, 'resnet_v1_50.ckpt')
示例#11
0
 def _resnet_v2_50(self,
                   X,
                   num_classes,
                   dropout_keep_prob=0.8,
                   is_train=False):
     arg_scope = resnet_arg_scope()
     with slim.arg_scope(arg_scope):
         net, end_points = resnet_v2_50(X, is_training=is_train)
     with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
                         stride=1,
                         padding='SAME'):
         with tf.variable_scope('Logits_out'):
             net = slim.conv2d(net,
                               1000, [1, 1],
                               activation_fn=None,
                               normalizer_fn=None,
                               scope='Logits_out0')
             net = slim.dropout(net,
                                dropout_keep_prob,
                                scope='Dropout_1b_out0')
             net = slim.conv2d(net,
                               200, [1, 1],
                               activation_fn=None,
                               normalizer_fn=None,
                               scope='Logits_out1')
             net = slim.dropout(net,
                                dropout_keep_prob,
                                scope='Dropout_1b_out1')
             net = slim.conv2d(net,
                               num_classes, [1, 1],
                               activation_fn=None,
                               normalizer_fn=None,
                               scope='Logits_out2')
             net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
     return net
示例#12
0
def generate_graph(output_root):
    os.makedirs(output_root, exist_ok=True)

    slim_dir = os.path.join(output_root, "models/slim")
    if not os.path.exists(slim_dir):
        clone_slim(output_root)

    sys.path.append(slim_dir)
    from nets import resnet_v1
    image_size = resnet_v1.resnet_v1.default_image_size

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        x = tf.placeholder(tf.float32, [1, image_size, image_size, 3])
        logits, _ = resnet_v1.resnet_v1_50(x,
                                           num_classes=1000,
                                           is_training=False)
        y = tf.nn.softmax(logits)

    model_path = download_model(output_root)
    sess = tf.Session()
    slim.assign_from_checkpoint_fn(model_path,
                                   slim.get_model_variables())(sess)

    graph = TensorFlowConverter(sess, batch_size=1).convert([x], [y])
    return sess, x, y, graph
        def build_layer(K):
            with arg_scope(
                [slim.conv2d],
                    trainable=False,
                    normalizer_fn=None,
                    normalizer_params=None,
                    biases_initializer=None,
                    biases_regularizer=None
            ):  #make first layer clean, no BN no biases no activation func

                layer1_name = LayerName(layer_name + '_sep_K' + str(K))
                net = slim.conv2d(net_conv4,
                                  K, [3, 1],
                                  trainable=is_training,
                                  weights_initializer=initializer,
                                  scope=layer1_name)

                layer2_name = LayerName(layer_name + '_K' + str(K))
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                with arg_scope([slim.conv2d],
                               trainable=False,
                               normalizer_fn=None,
                               normalizer_params=None
                               ):  #make second layer no BN but with biases
                    net = slim.conv2d(net,
                                      512, [1, 3],
                                      trainable=is_training,
                                      weights_initializer=initializer,
                                      scope=layer2_name)
            return net
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]
    """
    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = self._resnet_base_fn(
              inputs=ops.pad_to_multiple(preprocessed_inputs,
                                         self._pad_to_multiple),
              num_classes=None,
              is_training=None,
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
              min_base_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)
          image_features = self._filter_features(image_features)
      depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope(self._fpn_scope_name,
                               reuse=self._reuse_weights):
          base_fpn_max_level = min(self._fpn_max_level, 5)
          feature_block_list = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_block_list.append('block{}'.format(level - 1))
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key]) for key in feature_block_list],
              depth=depth_fn(self._additional_layer_depth))
          feature_maps = []
          for level in range(self._fpn_min_level, base_fpn_max_level + 1):
            feature_maps.append(
                fpn_features['top_down_block{}'.format(level - 1)])
          last_feature_map = fpn_features['top_down_block{}'.format(
              base_fpn_max_level - 1)]
          # Construct coarse features
          for i in range(base_fpn_max_level, self._fpn_max_level):
            last_feature_map = slim.conv2d(
                last_feature_map,
                num_outputs=depth_fn(self._additional_layer_depth),
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_block{}'.format(i))
            feature_maps.append(last_feature_map)
    return feature_maps
        def build_layer(K):
            with arg_scope(
                [slim.conv2d],
                    weights_regularizer=None,
                    weights_initializer=None,
                    trainable=False,
                    activation_fn=None,
                    normalizer_fn=None,
                    normalizer_params=None,
                    biases_initializer=None
            ):  #make first layer clean, no BN no biases no activation func

                layer1_name = LayerName(layer_name + '_sep_K' + str(K))
                net = conv2d_same(inputs,
                                  K,
                                  kernel_size=(kernel_size, 1),
                                  stride=[stride, 1],
                                  scope=layer1_name)

                layer2_name = LayerName(layer_name + '_K' + str(K))
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                net = conv2d_same(net,
                                  num_output_channels,
                                  kernel_size=(1, kernel_size),
                                  stride=[1, stride],
                                  scope=layer2_name)
            return net
        def build_layer(K):
            with arg_scope(
                [slim.fully_connected],
                    trainable=False,
                    normalizer_fn=None,
                    normalizer_params=None,
                    biases_initializer=None,
                    biases_regularizer=None
            ):  #make first layer clean, no BN no biases no activation func

                layer1_name = LayerName(layer_name + '_sep_K' + str(K))
                net = slim.fully_connected(input_,
                                           K,
                                           weights_initializer=initializer,
                                           trainable=is_training,
                                           activation_fn=None,
                                           scope=layer1_name)

                layer2_name = LayerName(layer_name + '_K' + str(K))
            with slim.arg_scope(resnet_arg_scope(is_training=False)):
                with arg_scope([slim.fully_connected],
                               trainable=False,
                               normalizer_fn=None,
                               normalizer_params=None
                               ):  #make second layer no BN but with biases
                    net = slim.fully_connected(net,
                                               num_outputs,
                                               weights_initializer=initializer,
                                               trainable=is_training,
                                               scope=layer2_name)
                return net
def trans_conv_3(inputs, kp_num=1):
    depth = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
    with tf.variable_scope('keypoints_trans_conv_3', reuse=tf.AUTO_REUSE):
        with tf.contrib.slim.arg_scope(resnet_arg_scope()):
            net = slim.conv2d_transpose(inputs, 64, [3, 3], stride=2, padding='SAME', activation_fn=None,
                                        scope='trans_conv_3')
    return net
示例#18
0
def build_model(images, num_classes, is_training=True, reuse=None):
    model = delf_v1.DelfV1()
    net, end_points = model.GetResnet50Subnetwork(images,
                                                  global_pool=True,
                                                  is_training=is_training,
                                                  reuse=reuse)

    with slim.arg_scope(
            resnet_v1.resnet_arg_scope(weight_decay=0.0001,
                                       batch_norm_scale=True)):
        with slim.arg_scope([slim.batch_norm], is_training=True):
            feature_map = end_points['resnet_v1_50/block3']
            feature_map = slim.conv2d(feature_map,
                                      512,
                                      1,
                                      rate=1,
                                      activation_fn=tf.nn.relu,
                                      scope='conv1')
            feature_map = tf.reduce_mean(feature_map, [1, 2])
            feature_map = tf.expand_dims(tf.expand_dims(feature_map, 1), 2)
            logits = slim.conv2d(feature_map,
                                 num_classes, [1, 1],
                                 activation_fn=None,
                                 normalizer_fn=None,
                                 scope='logits')
            logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
    return logits
示例#19
0
    def build(self, weight_path, sess, input_type=InputType.BASE64_JPEG):
        self.input_tensor = None
        self.session = sess
        if input_type == InputType.TENSOR:
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, 224, 224, 3],
                                        name="input")
            self.input_tensor = self.input
        elif input_type == InputType.BASE64_JPEG:
            self.input = tf.placeholder(tf.string,
                                        shape=(None, ),
                                        name='input')
            self.input_tensor = load_base64_tensor(self.input)
        else:
            raise ValueError('invalid input type')

        # only load inference model
        with arg_scope(
                resnet_v1.resnet_arg_scope(activation_fn=tf.nn.relu,
                                           weight_decay=0.0001)):
            self.logits_val, end_points = resnet_v1.resnet_v1_152(
                self.input_tensor,
                num_classes=self.num_classes,
                is_training=False,
                reuse=tf.AUTO_REUSE)
        # self.predictions = tf.nn.softmax(self.logits_val, name='Softmax')
        self.predictions = end_points['predictions']
        self.output = tf.identity(self.predictions, name='outputs')

        if weight_path is not None:
            self.load_trained_weights(weight_path)
示例#20
0
def mag(inputs,
        num_classes=3,
        num_channels=1000,
        is_training=True,
        global_pool=False,
        output_stride=16,
        upsample_ratio=2,
        spatial_squeeze=False,
        reuse=tf.AUTO_REUSE,
        scope='graspnet'):
    with tf.variable_scope(scope, 'graspnet', [inputs], reuse=reuse):
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            net, end_points = resnet_v1.resnet_v1_50(inputs=inputs,
                                                     num_classes=num_channels,
                                                     is_training=is_training,
                                                     global_pool=global_pool,
                                                     output_stride=output_stride,
                                                     spatial_squeeze=spatial_squeeze,
                                                     scope='feature_extractor')
        with tf.variable_scope('prediction', [net]) as sc:
            end_points_collection = sc.original_name_scope + '_end_points'
            # to do: add batch normalization to the following conv layers.
            with slim.arg_scope([slim.conv2d],
                                outputs_collections=end_points_collection):
                net = slim.conv2d(net, 512, [1, 1], scope='conv1')
                net = slim.conv2d(net, 128, [1, 1], scope='conv2')
                net = slim.conv2d(net, num_classes, [1, 1], scope='conv3')
                height, width = net.get_shape().as_list()[1:3]
                net = tf.image.resize_bilinear(net,
                                               [height * upsample_ratio, width * upsample_ratio],
                                               name='resize_bilinear')
                end_points.update(slim.utils.convert_collection_to_dict(end_points_collection))
    end_points['logits'] = net
    return net, end_points
示例#21
0
def single_tower(colors,
                 depths,
                 num_classes=3,
                 num_channels=1000,
                 is_training=True,
                 global_pool=False,
                 output_stride=16,
                 spatial_squeeze=False,
                 scope='arcnet'):
    inputs = tf.concat([colors, depths], axis=3)
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, end_points = resnet_v1.resnet_v1_101(
            inputs=inputs,
            num_classes=num_channels,
            is_training=is_training,
            global_pool=global_pool,
            output_stride=output_stride,
            spatial_squeeze=spatial_squeeze,
            scope=scope + '_tower')
    with tf.variable_scope(scope, 'arcnet', [net]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # to do: add batch normalization to the following conv layers.
        with slim.arg_scope([slim.conv2d],
                            outputs_collections=end_points_collection):
            net = slim.conv2d(net, 512, [1, 1], scope='conv1')
            net = slim.conv2d(net, 128, [1, 1], scope='conv2')
            net = slim.conv2d(net, num_classes, [1, 1], scope='conv3')
            height, width = net.get_shape().as_list()[1:3]
            net = tf.image.resize_bilinear(net, [height * 2, width * 2],
                                           name='resize_bilinear')
            end_points = slim.utils.convert_collection_to_dict(
                end_points_collection)
    end_points['logits'] = net
    return net, end_points
示例#22
0
  def _GetAttentionModel(
      self,
      images,
      num_classes,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

    attention_feat, attention_prob, attention_score, feature_map, _ = (
        self.GetAttentionPrelogit(
            images,
            weight_decay,
            attention_nonlinear=attention_nonlinear,
            attention_type=attention_type,
            kernel=kernel,
            training_resnet=training_resnet,
            training_attention=training_attention,
            reuse=reuse))
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, batch_norm_scale=True)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        with tf.variable_scope(
            _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
          logits = slim.conv2d(
              attention_feat,
              num_classes, [1, 1],
              activation_fn=None,
              normalizer_fn=None,
              scope='logits')
          logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
    return logits, attention_prob, attention_score, feature_map
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          _, image_features = self._resnet_base_fn(
              inputs=ops.pad_to_multiple(preprocessed_inputs,
                                         self._pad_to_multiple),
              num_classes=None,
              is_training=None,
              global_pool=False,
              output_stride=None,
              store_non_strided_activations=True,
              scope=scope)
          image_features = self._filter_features(image_features)
      with slim.arg_scope(self._conv_hyperparams_fn()):
        with tf.variable_scope(self._fpn_scope_name,
                               reuse=self._reuse_weights):
          fpn_features = feature_map_generators.fpn_top_down_feature_maps(
              [(key, image_features[key])
               for key in ['block2', 'block3', 'block4']],
              depth=256)
          last_feature_map = fpn_features['top_down_block4']
          coarse_features = {}
          for i in range(5, 7):
            last_feature_map = slim.conv2d(
                last_feature_map,
                num_outputs=256,
                kernel_size=[3, 3],
                stride=2,
                padding='SAME',
                scope='bottom_up_block{}'.format(i))
            coarse_features['bottom_up_block{}'.format(i)] = last_feature_map
    return [fpn_features['top_down_block2'],
            fpn_features['top_down_block3'],
            fpn_features['top_down_block4'],
            coarse_features['bottom_up_block5'],
            coarse_features['bottom_up_block6']]
示例#24
0
  def _GetAttentionModel(
      self,
      images,
      num_classes,
      weight_decay=0.0001,
      attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0],
      attention_type=_SUPPORTED_ATTENTION_TYPES[0],
      kernel=1,
      training_resnet=False,
      training_attention=False,
      reuse=False):
    """Constructs attention model on resnet_v1_50.

    Args:
      images: A tensor of size [batch, height, width, channels]
      num_classes: The number of output classes.
      weight_decay: The parameters for weight_decay regularizer.
      attention_nonlinear: Type of non-linearity on top of the attention
        function.
      attention_type: Type of the attention structure.
      kernel: Convolutional kernel to use in attention layers (eg, [3, 3]).
      training_resnet: Whether or not the Resnet blocks from the model are in
        training mode.
      training_attention: Whether or not the attention part of the model is in
        training mode.
      reuse: Whether or not the layer and its variables should be reused.

    Returns:
      logits: A tensor of size [batch, num_classes].
      attention_prob: Attention score after the non-linearity.
      attention_score: Attention score before the non-linearity.
      feature_map: Features extracted from the model, which are not
        l2-normalized.
    """

    attention_feat, attention_prob, attention_score, feature_map, _ = (
        self.GetAttentionPrelogit(
            images,
            weight_decay,
            attention_nonlinear=attention_nonlinear,
            attention_type=attention_type,
            kernel=kernel,
            training_resnet=training_resnet,
            training_attention=training_attention,
            reuse=reuse))
    with slim.arg_scope(
        resnet_v1.resnet_arg_scope(
            weight_decay=weight_decay, batch_norm_scale=True)):
      with slim.arg_scope([slim.batch_norm], is_training=training_attention):
        with tf.variable_scope(
            _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse):
          logits = slim.conv2d(
              attention_feat,
              num_classes, [1, 1],
              activation_fn=None,
              normalizer_fn=None,
              scope='logits')
          logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze')
    return logits, attention_prob, attention_score, feature_map
示例#25
0
def model(images, weight_decay=1e-5, is_training=True, eval=False):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images, eval)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
           
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                   # logging.info(i)
                   # logging.info(g[i-1].get_shape().as_list())
                    #logging.info(f[i].get_shape().as_list())
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))
            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)
            '''
            F_score = slim.conv2d(end_points['pool2'], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            geo_map = slim.conv2d(end_points['pool2'], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(end_points['pool2'], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            '''
	    F_geometry = tf.concat([geo_map, angle_map], axis=-1)
            
    return F_score, F_geometry
示例#26
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
        if self._depth_multiplier != 1.0:
            raise ValueError('Depth multiplier not supported.')

        preprocessed_inputs = shape_utils.check_min_image_dim(
            129, preprocessed_inputs)

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = self._resnet_base_fn(
                        inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                   self._pad_to_multiple),
                        sparsity_type=self._sparsity_type,
                        sparse_dense_branch=self._sparse_dense_branch,
                        num_classes=None,
                        is_training=None,
                        global_pool=False,
                        output_stride=None,
                        include_root_block=self._include_root_block,
                        depthwise_convolution=self._depthwise_convolution,
                        max_pool_subsample=self._max_pool_subsample,
                        root_downsampling_rate=self._root_downsampling_rate,
                        store_non_strided_activations=self.
                        _store_non_strided_activations,
                        scope=scope)
                    image_features = self._filter_features(image_features)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                with tf.variable_scope('multiscale_feature_extractor',
                                       reuse=self._reuse_weights):
                    feature_block_list = []
                    for level in range(2, 6):
                        feature_block_list.append('block{}'.format(level - 1))

                    multiscale_features = feature_map_generators.multiscale_fusion_feature_maps(
                        [(key, image_features[key])
                         for key in feature_block_list],
                        depth=256)

                    feature_maps = [multiscale_features['feature_map']]

        return feature_maps
示例#27
0
    def extract_features(self, preprocessed_inputs):
        """Extract features from preprocessed inputs.

        Args:
          preprocessed_inputs: a [batch, height, width, channels] float tensor
            representing a batch of images.

        Returns:
          feature_maps: a list of tensors where the ith tensor has shape
            [batch, height_i, width_i, depth_i]
        """
        preprocessed_inputs = shape_utils.check_min_image_dim(
            33, preprocessed_inputs)

        feature_map_layout = {
            'from_layer': [
                'FeatureExtractor/{}/block3'.format(self._resnet_scope_name),
                'FeatureExtractor/{}/block4'.format(self._resnet_scope_name),
                '', '', '', ''
            ],
            'layer_depth': [-1, -1, 512, 256, 256, 128],
            'use_depthwise':
            self._use_depthwise,
            'use_explicit_padding':
            self._use_explicit_padding,
        }

        if self._num_layers == 7:
            feature_map_layout['from_layer'] += ['']
            feature_map_layout['layer_depth'] += [64]

        with tf.variable_scope(self._resnet_scope_name,
                               reuse=self._reuse_weights) as scope:
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                with (slim.arg_scope(self._conv_hyperparams_fn())
                      if self._override_base_feature_extractor_hyperparams else
                      context_manager.IdentityContextManager()):
                    _, image_features = self._resnet_base_fn(
                        inputs=ops.pad_to_multiple(preprocessed_inputs,
                                                   self._pad_to_multiple),
                        num_classes=None,
                        is_training=None,
                        global_pool=False,
                        output_stride=None,
                        store_non_strided_activations=True,
                        min_base_depth=self._min_depth,
                        depth_multiplier=self._depth_multiplier,
                        scope=scope)
            with slim.arg_scope(self._conv_hyperparams_fn()):
                feature_maps = feature_map_generators.multi_resolution_feature_maps(
                    feature_map_layout=feature_map_layout,
                    depth_multiplier=self._depth_multiplier,
                    min_depth=self._min_depth,
                    insert_1x1_conv=True,
                    image_features=image_features)

        return feature_maps.values()
示例#28
0
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None, None]
            h = [None, None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    ################ Modified by Xiaolong March. 9th ####################
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))
            h[4] = GlobalAveragePooling2D()(g[3])
            # the predicted class score is mapped back to the previous convolutional layer to generate
            # the class activation mapa. the CAm highlights
            h4_tile = tf.tile(tf.reshape(h[4],[-1, 1, num_outputs[3], 1]), [1, tf.shape(g[3])[1], 1, 1])
            ram = tf.matmul(g[3],h4_tile)
            g[4] = slim.conv2d(ram, num_outputs[3], 3)
            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[4], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[4], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[4], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry, ram
示例#29
0
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images,
                                                    is_training=is_training,
                                                    scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [
                end_points['pool5'], end_points['pool4'], end_points['pool3'],
                end_points['pool2']
            ]
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1),
                                       num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
            F_score = slim.conv2d(g[3],
                                  1,
                                  1,
                                  activation_fn=tf.nn.sigmoid,
                                  normalizer_fn=None)
            geo_map = slim.conv2d(
                g[3], 4, 1, activation_fn=tf.nn.sigmoid,
                normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(
                g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) -
                         0.5) * np.pi / 2  # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
def hmnet_layer_2(inputs, kp_num=1):
    depth = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
    with tf.variable_scope('heatmap_layer_2'):
        with tf.contrib.slim.arg_scope(resnet_arg_scope()):
            net = slim.conv2d_transpose(inputs,
                                        64, [3, 3],
                                        stride=2,
                                        padding='SAME',
                                        scope='deconv3')
    return net
def get_network_fn(num_classes, weight_decay=0.0):
    arg_scope = resnet_v1.resnet_arg_scope(weight_decay=weight_decay)
    func = resnet_v1.resnet_v1_50
    @functools.wraps(func)
    def network_fn(images):
        with slim.arg_scope(arg_scope):
            return func(images, num_classes)
    if hasattr(func, 'default_image_size'):
        network_fn.default_image_size = func.default_image_size
    return(network_fn)
示例#32
0
def ResNet50Model(input_tensor, weight_decay=1e-5, is_training=True):
    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        input_tensor = tf.image.resize_images(input_tensor, [224, 224])
        logits, end_points = resnet_v1.resnet_v1_50(input_tensor,
                                                    is_training=is_training,
                                                    scope='resnet_v1_50')
        feature = tf.reduce_mean(logits, reduction_indices=[1, 2])
        fc1 = tf.contrib.layers.fully_connected(feature, num_outputs=512)
        fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=10)
    return fc2
示例#33
0
def _resnet_rf(csv_writer=None):
  """Computes RF and associated parameters for resnet models.

  The computed values are written to stdout.

  Args:
    csv_writer: A CSV writer for RF parameters, which is used if it is not None.
  """
  for model_type in _SUPPORTED_RESNET_VARIANTS:
    arg_sc = resnet_v1.resnet_arg_scope()
    _process_model_rf(model_type, csv_writer, arg_sc)
示例#34
0
文件: model.py 项目: ausk/EAST_ICPR
def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
  def extract_features(self, preprocessed_inputs):
    """Extract features from preprocessed inputs.

    Args:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      feature_maps: a list of tensors where the ith tensor has shape
        [batch, height_i, width_i, depth_i]

    Raises:
      ValueError: depth multiplier is not supported.
    """
    if self._depth_multiplier != 1.0:
      raise ValueError('Depth multiplier not supported.')

    preprocessed_inputs = shape_utils.check_min_image_dim(
        129, preprocessed_inputs)

    with tf.variable_scope(
        self._resnet_scope_name, reuse=self._reuse_weights) as scope:
      with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        with (slim.arg_scope(self._conv_hyperparams_fn())
              if self._override_base_feature_extractor_hyperparams else
              context_manager.IdentityContextManager()):
          with slim.arg_scope(
              [resnet_v1.bottleneck],
              use_bounded_activations=self._use_bounded_activations):
            _, activations = self._resnet_base_fn(
                inputs=ops.pad_to_multiple(preprocessed_inputs,
                                           self._pad_to_multiple),
                num_classes=None,
                is_training=None,
                global_pool=False,
                output_stride=None,
                store_non_strided_activations=True,
                scope=scope)

      with slim.arg_scope(self._conv_hyperparams_fn()):
        feature_maps = feature_map_generators.pooling_pyramid_feature_maps(
            base_feature_map_depth=self._base_feature_map_depth,
            num_layers=self._num_layers,
            image_features={
                'image_features': self._filter_features(activations)['block3']
            })
    return feature_maps.values()
def resnet_v1_50_16s(image_batch_tensor,
                      number_of_classes,
                      is_training):
    """Returns the resnet_v1_50_16s model definition.
    The function returns the model definition of a network that was described
    in 'DeepLab: Semantic Image Segmentation with Deep Convolutional Nets,
    Atrous Convolution, and Fully Connected CRFs' by Chen et al.
    The network subsamples the input by a factor of 16 and uses the bilinear
    upsampling kernel to upsample prediction by a factor of 16. This means that
    if the image size is not of the factor 16, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(resnet_v1_50_16s, 16). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 8.
    resnet_v1_50_16s_variables_mapping : dict {string: variable}
        Dict which maps the resnet_v1_50_16s model's variables to resnet_v1_50 checkpoint variables
        names. We need this to initilize the weights of resnet_v1_50_16s model with resnet_v1_50 from
        checkpoint file. Look at ipython notebook for examples.
    """
    
    with tf.variable_scope("resnet_v1_50_16s") as resnet_v1_50_16s:

        upsample_factor = 16

        # Convert image to float32 before subtracting the
        # mean pixel value
        image_batch_float = tf.to_float(image_batch_tensor)

        # Subtract the mean pixel value from each pixel
        mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

        upsample_filter_np = bilinear_upsample_weights(upsample_factor,
                                                       number_of_classes)

        upsample_filter_tensor = tf.constant(upsample_filter_np)

        # TODO: make pull request to get this custom vgg feature accepted
        # to avoid using custom slim repo.
        
        
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_50(mean_centered_image_batch,
                                                number_of_classes,
                                                is_training=is_training,
                                                global_pool=False,
                                                output_stride=16)
        

        downsampled_logits_shape = tf.shape(logits)

        # Calculate the ouput size of the upsampled tensor
        upsampled_logits_shape = tf.pack([
                                          downsampled_logits_shape[0],
                                          downsampled_logits_shape[1] * upsample_factor,
                                          downsampled_logits_shape[2] * upsample_factor,
                                          downsampled_logits_shape[3]
                                         ])

        # Perform the upsampling
        upsampled_logits = tf.nn.conv2d_transpose(logits,
                                                  upsample_filter_tensor,
                                                  output_shape=upsampled_logits_shape,
                                                  strides=[1, upsample_factor, upsample_factor, 1])

        # Map the original vgg-16 variable names
        # to the variables in our model. This is done
        # to make it possible to use assign_from_checkpoint_fn()
        # while providing this mapping.
        # TODO: make it cleaner
        resnet_v1_50_16s_variables_mapping = {}

        resnet_v1_50_16s_variables = slim.get_variables(resnet_v1_50_16s)

        for variable in resnet_v1_50_16s_variables:

            # Here we remove the part of a name of the variable
            # that is responsible for the current variable scope
            original_resnet_v1_50_checkpoint_string = variable.name[len(resnet_v1_50_16s.original_name_scope):-2]
            resnet_v1_50_16s_variables_mapping[original_resnet_v1_50_checkpoint_string] = variable

    return upsampled_logits, resnet_v1_50_16s_variables_mapping