def GetAttentionPrelogit( self, images, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False, use_batch_norm=True): """Constructs attention model on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels]. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. use_batch_norm: Whether or not to use batch normalization. Returns: prelogits: A tensor of size [batch, 1, 1, channels]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. end_points: Set of activations for external use. """ # Construct Resnet50 features. with slim.arg_scope( resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)): _, end_points = self.GetResnet50Subnetwork( images, is_training=training_resnet, reuse=reuse) feature_map = end_points[self._target_layer_type] # Construct attention subnetwork on top of features. with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay, use_batch_norm=use_batch_norm)): with slim.arg_scope([slim.batch_norm], is_training=training_attention): (prelogits, attention_prob, attention_score, end_points) = self._GetAttentionSubnetwork( feature_map, end_points, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, reuse=reuse) return prelogits, attention_prob, attention_score, feature_map, end_points
def __init__(self, num_classes, train_layers=None, weights_path='DEFAULT'): """Create the graph of the resnetv1_50 model. """ # Parse input arguments into class variables if weights_path == 'DEFAULT': self.WEIGHTS_PATH = "./pre_trained_models/resnet_v1_50.ckpt" else: self.WEIGHTS_PATH = weights_path self.train_layers = train_layers with tf.variable_scope("input"): self.image_size = resnet_v1.resnet_v1_50.default_image_size self.x_input = tf.placeholder(tf.float32, [None, self.image_size, self.image_size, 3], name="x_input") self.y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input") self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") # train with arg_scope(resnet_v1.resnet_arg_scope()): self.logits, _ = resnet_v1.resnet_v1_50(self.x_input, num_classes=num_classes, is_training=True, reuse=tf.AUTO_REUSE ) # validation with arg_scope(resnet_v1.resnet_arg_scope()): self.logits_val, _ = resnet_v1.resnet_v1_50(self.x_input, num_classes=num_classes, is_training=False, euse=tf.AUTO_REUSE ) with tf.name_scope("loss"): self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits, labels=self.y_input)) self.loss_val = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_val, labels=self.y_input)) with tf.name_scope("train"): self.global_step = tf.Variable(0, name="global_step", trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = [v for v in tf.trainable_variables() if v.name.split('/')[-2] in train_layers or v.name.split('/')[-3] in train_layers ] gradients = tf.gradients(self.loss, var_list) self.grads_and_vars = list(zip(gradients, var_list)) optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients(grads_and_vars=self.grads_and_vars, global_step=self.global_step) with tf.name_scope("probability"): self.probability = tf.nn.softmax(self.logits_val, name="probability") with tf.name_scope("prediction"): self.prediction = tf.argmax(self.logits_val, 1, name="prediction") with tf.name_scope("accuracy"): correct_prediction = tf.equal(self.prediction, tf.argmax(self.y_input, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
def GetAttentionPrelogit( self, images, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False, use_batch_norm=True): """Constructs attention model on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels]. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. use_batch_norm: Whether or not to use batch normalization. Returns: prelogits: A tensor of size [batch, 1, 1, channels]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. end_points: Set of activations for external use. """ # Construct Resnet50 features. with slim.arg_scope( resnet_v1.resnet_arg_scope(use_batch_norm=use_batch_norm)): _, end_points = self.GetResnet50Subnetwork( images, is_training=training_resnet, reuse=reuse) feature_map = end_points[self._target_layer_type] # Construct attention subnetwork on top of features. with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=weight_decay, use_batch_norm=use_batch_norm)): with slim.arg_scope([slim.batch_norm], is_training=training_attention): (prelogits, attention_prob, attention_score, end_points) = self._GetAttentionSubnetwork( feature_map, end_points, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, reuse=reuse) return prelogits, attention_prob, attention_score, feature_map, end_points
def build_train_op(image_tensor, label_tensor, is_training): resnet_argscope = resnet_arg_scope(weight_decay=FLAGS.weight_decay) global_step = tf.get_variable(name="global_step", shape=[], dtype=tf.int32, trainable=False) with slim.arg_scope(resnet_argscope): logits, end_points = resnet_v1_50(image_tensor, is_training=is_training, num_classes=100) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label_tensor)) accuracy = tf.reduce_sum( tf.cast( tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), label_tensor), tf.int32)) end_points['loss'], end_points['accuracy'] = loss, accuracy if is_training: optimizer = tf.train.AdadeltaOptimizer( learning_rate=FLAGS.learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) return train_op, end_points else: return None, end_points
def fully_connected(self, input_, num_outputs, is_training, initializer, layer_name): if layer_name not in self._net_desc: return super(resnetv1_sep, self).fully_connected(input_, num_outputs, is_training, initializer, layer_name) K = self._net_desc[layer_name] layer1_name = LayerName(layer_name + '_sep_K'+str(K)) with arg_scope( [slim.fully_connected], trainable=False, normalizer_fn=None, normalizer_params=None, biases_initializer=None, biases_regularizer=None): #make first layer clean, no BN no biases no activation func net = slim.fully_connected(input_, K, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope=layer1_name) layer2_name = LayerName(layer_name) # layer2_name = LayerName(layer_name + '_K'+str(K)) with slim.arg_scope(resnet_arg_scope(is_training=False)): with arg_scope( [slim.fully_connected], trainable=False, normalizer_fn=None, normalizer_params=None): #make second layer no BN but with biases net = slim.fully_connected(net, num_outputs, weights_initializer=initializer, trainable=is_training, scope=layer2_name) return net
def rpn_convolution(self, net_conv4, is_training, initializer): layer_name = 'rpn_conv/3x3' if layer_name not in self._net_desc: return super(resnetv1_sep, self).rpn_convolution(net_conv4, is_training, initializer) K = self._net_desc[layer_name] layer1_name = LayerName(layer_name + '_sep_K'+str(K)) with arg_scope( [slim.conv2d], trainable=False, normalizer_fn=None, normalizer_params=None, biases_initializer=None, biases_regularizer=None): #make first layer clean, no BN no biases no activation func net = slim.conv2d(net_conv4, K, [3, 1], trainable=is_training, weights_initializer=initializer, scope=layer1_name) layer2_name = LayerName(layer_name) # layer2_name = LayerName(layer_name + '_K'+str(K)) with slim.arg_scope(resnet_arg_scope(is_training=False)): with arg_scope( [slim.conv2d], trainable=False, normalizer_fn=None, normalizer_params=None): #make second layer no BN but with biases net = slim.conv2d(net, 512, [1, 3], trainable=is_training, weights_initializer=initializer, scope=layer2_name) return net
def test_resnet_v1_50(img_dir): """ Test ResNet-V1-50 with a single image. :param img_dir: Path of the image to be classified :return: classification result and probability of a single image """ img = cv2.imread(img_dir) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (224, 224)) img = img.reshape((1, 224, 224, 3)) tf.reset_default_graph() inputs = tf.placeholder(name='input_images', shape=[None, 224, 224, 3], dtype=tf.float32) with slim.arg_scope(resnet_arg_scope()): _, _ = resnet_v1_50(inputs, 1000, is_training=False) with tf.Session() as sess: tf.train.Saver().restore(sess, './models/resnet_v1_50.ckpt') inputs = sess.graph.get_tensor_by_name('input_images:0') outputs = sess.graph.get_tensor_by_name( 'resnet_v1_50/SpatialSqueeze:0') pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0] prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0] pred, prob = sess.run([pred, prob], feed_dict={inputs: img}) name = label_dict[pred + 1] print('Result of ResNet-V1-50:', name, prob) return name, prob
def rpn_convolution(self, net_conv4, is_training, initializer): layer_name = 'rpn_conv/3x3' if layer_name not in self._comp_weights_dict.keys(): return slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope=layer_name) K = self._K_by_layer_dict[layer_name] layer1_name = LayerName(layer_name.replace('conv', 'convsep')) with arg_scope( [slim.conv2d], trainable=False, normalizer_fn=None, normalizer_params=None, biases_initializer=None, biases_regularizer=None): #make first layer clean, no BN no biases no activation func net = slim.conv2d(net_conv4, K, [3, 1], trainable=is_training, weights_initializer=initializer, scope=layer1_name) with slim.arg_scope(resnet_arg_scope(is_training=False)): with arg_scope( [slim.conv2d], trainable=False, normalizer_fn=None, normalizer_params=None): #make second layer no BN but with biases net = slim.conv2d(net, 512, [1, 3], trainable=is_training, weights_initializer=initializer, scope=layer_name) return net
def network_entire(images): ''' A tensorflow operation that extracts features for a batch of images. Args: images: Numpy array of shape (n, h, w, 3). Returns: embedding: Tensor of shape (n, 128). ''' # Normalization. images = images - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3)) # Travel through the network and get the embedding. with slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)): _, endpoints = resnet_v1_50(images, num_classes=None, is_training=False, global_pool=True) endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean( endpoints['resnet_v1_50/block4'], [1, 2], name='pool5', keep_dims=False) with tf.name_scope('head'): endpoints = head(endpoints, embedding_dim, is_training=False) embedding = endpoints['emb'] return embedding
def __init__(self): from nets import resnet_v1 self.image_size = 224 self.num_classes = 1000 self.predictions_is_correct = False self.use_larger_step_size = False self.use_smoothed_grad = False # For dataprior attacks. gamma = A^2 * D / d in the paper self.gamma = 2.7 batch_shape = [None, self.image_size, self.image_size, 3] self.x_input = tf.placeholder(tf.float32, shape=batch_shape) self.target_label = tf.placeholder(tf.int32, shape=[None]) target_onehot = tf.one_hot(self.target_label, self.num_classes) with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50( self.x_input, num_classes=self.num_classes, is_training=False) self.predicted_labels = tf.argmax(end_points['predictions'], 1) #logits -= tf.reduce_min(logits) #real = tf.reduce_max(logits * target_onehot, 1) #other = tf.reduce_max(logits * (1 - target_onehot), 1) #self.loss = other - real self.loss = tf.nn.softmax_cross_entropy_with_logits( labels=target_onehot, logits=logits) self.grad = 255.0 * tf.gradients(self.loss, self.x_input)[0] saver = tf.train.Saver(slim.get_model_variables(scope='resnet_v1')) self.sess = tf.get_default_session() saver.restore(self.sess, 'resnet_v1_50.ckpt')
def _resnet_v2_50(self, X, num_classes, dropout_keep_prob=0.8, is_train=False): arg_scope = resnet_arg_scope() with slim.arg_scope(arg_scope): net, end_points = resnet_v2_50(X, is_training=is_train) with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): with tf.variable_scope('Logits_out'): net = slim.conv2d(net, 1000, [1, 1], activation_fn=None, normalizer_fn=None, scope='Logits_out0') net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b_out0') net = slim.conv2d(net, 200, [1, 1], activation_fn=None, normalizer_fn=None, scope='Logits_out1') net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b_out1') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='Logits_out2') net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') return net
def generate_graph(output_root): os.makedirs(output_root, exist_ok=True) slim_dir = os.path.join(output_root, "models/slim") if not os.path.exists(slim_dir): clone_slim(output_root) sys.path.append(slim_dir) from nets import resnet_v1 image_size = resnet_v1.resnet_v1.default_image_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): x = tf.placeholder(tf.float32, [1, image_size, image_size, 3]) logits, _ = resnet_v1.resnet_v1_50(x, num_classes=1000, is_training=False) y = tf.nn.softmax(logits) model_path = download_model(output_root) sess = tf.Session() slim.assign_from_checkpoint_fn(model_path, slim.get_model_variables())(sess) graph = TensorFlowConverter(sess, batch_size=1).convert([x], [y]) return sess, x, y, graph
def build_layer(K): with arg_scope( [slim.conv2d], trainable=False, normalizer_fn=None, normalizer_params=None, biases_initializer=None, biases_regularizer=None ): #make first layer clean, no BN no biases no activation func layer1_name = LayerName(layer_name + '_sep_K' + str(K)) net = slim.conv2d(net_conv4, K, [3, 1], trainable=is_training, weights_initializer=initializer, scope=layer1_name) layer2_name = LayerName(layer_name + '_K' + str(K)) with slim.arg_scope(resnet_arg_scope(is_training=False)): with arg_scope([slim.conv2d], trainable=False, normalizer_fn=None, normalizer_params=None ): #make second layer no BN but with biases net = slim.conv2d(net, 512, [1, 3], trainable=is_training, weights_initializer=initializer, scope=layer2_name) return net
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 129, preprocessed_inputs) with tf.variable_scope( self._resnet_scope_name, reuse=self._reuse_weights) as scope: with slim.arg_scope(resnet_v1.resnet_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = self._resnet_base_fn( inputs=ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=None, global_pool=False, output_stride=None, store_non_strided_activations=True, min_base_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) image_features = self._filter_features(image_features) depth_fn = lambda d: max(int(d * self._depth_multiplier), self._min_depth) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights): base_fpn_max_level = min(self._fpn_max_level, 5) feature_block_list = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_block_list.append('block{}'.format(level - 1)) fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=depth_fn(self._additional_layer_depth)) feature_maps = [] for level in range(self._fpn_min_level, base_fpn_max_level + 1): feature_maps.append( fpn_features['top_down_block{}'.format(level - 1)]) last_feature_map = fpn_features['top_down_block{}'.format( base_fpn_max_level - 1)] # Construct coarse features for i in range(base_fpn_max_level, self._fpn_max_level): last_feature_map = slim.conv2d( last_feature_map, num_outputs=depth_fn(self._additional_layer_depth), kernel_size=[3, 3], stride=2, padding='SAME', scope='bottom_up_block{}'.format(i)) feature_maps.append(last_feature_map) return feature_maps
def build_layer(K): with arg_scope( [slim.conv2d], weights_regularizer=None, weights_initializer=None, trainable=False, activation_fn=None, normalizer_fn=None, normalizer_params=None, biases_initializer=None ): #make first layer clean, no BN no biases no activation func layer1_name = LayerName(layer_name + '_sep_K' + str(K)) net = conv2d_same(inputs, K, kernel_size=(kernel_size, 1), stride=[stride, 1], scope=layer1_name) layer2_name = LayerName(layer_name + '_K' + str(K)) with slim.arg_scope(resnet_arg_scope(is_training=False)): net = conv2d_same(net, num_output_channels, kernel_size=(1, kernel_size), stride=[1, stride], scope=layer2_name) return net
def build_layer(K): with arg_scope( [slim.fully_connected], trainable=False, normalizer_fn=None, normalizer_params=None, biases_initializer=None, biases_regularizer=None ): #make first layer clean, no BN no biases no activation func layer1_name = LayerName(layer_name + '_sep_K' + str(K)) net = slim.fully_connected(input_, K, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope=layer1_name) layer2_name = LayerName(layer_name + '_K' + str(K)) with slim.arg_scope(resnet_arg_scope(is_training=False)): with arg_scope([slim.fully_connected], trainable=False, normalizer_fn=None, normalizer_params=None ): #make second layer no BN but with biases net = slim.fully_connected(net, num_outputs, weights_initializer=initializer, trainable=is_training, scope=layer2_name) return net
def trans_conv_3(inputs, kp_num=1): depth = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) with tf.variable_scope('keypoints_trans_conv_3', reuse=tf.AUTO_REUSE): with tf.contrib.slim.arg_scope(resnet_arg_scope()): net = slim.conv2d_transpose(inputs, 64, [3, 3], stride=2, padding='SAME', activation_fn=None, scope='trans_conv_3') return net
def build_model(images, num_classes, is_training=True, reuse=None): model = delf_v1.DelfV1() net, end_points = model.GetResnet50Subnetwork(images, global_pool=True, is_training=is_training, reuse=reuse) with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=0.0001, batch_norm_scale=True)): with slim.arg_scope([slim.batch_norm], is_training=True): feature_map = end_points['resnet_v1_50/block3'] feature_map = slim.conv2d(feature_map, 512, 1, rate=1, activation_fn=tf.nn.relu, scope='conv1') feature_map = tf.reduce_mean(feature_map, [1, 2]) feature_map = tf.expand_dims(tf.expand_dims(feature_map, 1), 2) logits = slim.conv2d(feature_map, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze') return logits
def build(self, weight_path, sess, input_type=InputType.BASE64_JPEG): self.input_tensor = None self.session = sess if input_type == InputType.TENSOR: self.input = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name="input") self.input_tensor = self.input elif input_type == InputType.BASE64_JPEG: self.input = tf.placeholder(tf.string, shape=(None, ), name='input') self.input_tensor = load_base64_tensor(self.input) else: raise ValueError('invalid input type') # only load inference model with arg_scope( resnet_v1.resnet_arg_scope(activation_fn=tf.nn.relu, weight_decay=0.0001)): self.logits_val, end_points = resnet_v1.resnet_v1_152( self.input_tensor, num_classes=self.num_classes, is_training=False, reuse=tf.AUTO_REUSE) # self.predictions = tf.nn.softmax(self.logits_val, name='Softmax') self.predictions = end_points['predictions'] self.output = tf.identity(self.predictions, name='outputs') if weight_path is not None: self.load_trained_weights(weight_path)
def mag(inputs, num_classes=3, num_channels=1000, is_training=True, global_pool=False, output_stride=16, upsample_ratio=2, spatial_squeeze=False, reuse=tf.AUTO_REUSE, scope='graspnet'): with tf.variable_scope(scope, 'graspnet', [inputs], reuse=reuse): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs=inputs, num_classes=num_channels, is_training=is_training, global_pool=global_pool, output_stride=output_stride, spatial_squeeze=spatial_squeeze, scope='feature_extractor') with tf.variable_scope('prediction', [net]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # to do: add batch normalization to the following conv layers. with slim.arg_scope([slim.conv2d], outputs_collections=end_points_collection): net = slim.conv2d(net, 512, [1, 1], scope='conv1') net = slim.conv2d(net, 128, [1, 1], scope='conv2') net = slim.conv2d(net, num_classes, [1, 1], scope='conv3') height, width = net.get_shape().as_list()[1:3] net = tf.image.resize_bilinear(net, [height * upsample_ratio, width * upsample_ratio], name='resize_bilinear') end_points.update(slim.utils.convert_collection_to_dict(end_points_collection)) end_points['logits'] = net return net, end_points
def single_tower(colors, depths, num_classes=3, num_channels=1000, is_training=True, global_pool=False, output_stride=16, spatial_squeeze=False, scope='arcnet'): inputs = tf.concat([colors, depths], axis=3) with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101( inputs=inputs, num_classes=num_channels, is_training=is_training, global_pool=global_pool, output_stride=output_stride, spatial_squeeze=spatial_squeeze, scope=scope + '_tower') with tf.variable_scope(scope, 'arcnet', [net]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # to do: add batch normalization to the following conv layers. with slim.arg_scope([slim.conv2d], outputs_collections=end_points_collection): net = slim.conv2d(net, 512, [1, 1], scope='conv1') net = slim.conv2d(net, 128, [1, 1], scope='conv2') net = slim.conv2d(net, num_classes, [1, 1], scope='conv3') height, width = net.get_shape().as_list()[1:3] net = tf.image.resize_bilinear(net, [height * 2, width * 2], name='resize_bilinear') end_points = slim.utils.convert_collection_to_dict( end_points_collection) end_points['logits'] = net return net, end_points
def _GetAttentionModel( self, images, num_classes, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False): """Constructs attention model on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels] num_classes: The number of output classes. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. Returns: logits: A tensor of size [batch, num_classes]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. """ attention_feat, attention_prob, attention_score, feature_map, _ = ( self.GetAttentionPrelogit( images, weight_decay, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, training_resnet=training_resnet, training_attention=training_attention, reuse=reuse)) with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=weight_decay, batch_norm_scale=True)): with slim.arg_scope([slim.batch_norm], is_training=training_attention): with tf.variable_scope( _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse): logits = slim.conv2d( attention_feat, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze') return logits, attention_prob, attention_score, feature_map
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: depth multiplier is not supported. """ if self._depth_multiplier != 1.0: raise ValueError('Depth multiplier not supported.') preprocessed_inputs = shape_utils.check_min_image_dim( 129, preprocessed_inputs) with tf.variable_scope( self._resnet_scope_name, reuse=self._reuse_weights) as scope: with slim.arg_scope(resnet_v1.resnet_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = self._resnet_base_fn( inputs=ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=None, global_pool=False, output_stride=None, store_non_strided_activations=True, scope=scope) image_features = self._filter_features(image_features) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope(self._fpn_scope_name, reuse=self._reuse_weights): fpn_features = feature_map_generators.fpn_top_down_feature_maps( [(key, image_features[key]) for key in ['block2', 'block3', 'block4']], depth=256) last_feature_map = fpn_features['top_down_block4'] coarse_features = {} for i in range(5, 7): last_feature_map = slim.conv2d( last_feature_map, num_outputs=256, kernel_size=[3, 3], stride=2, padding='SAME', scope='bottom_up_block{}'.format(i)) coarse_features['bottom_up_block{}'.format(i)] = last_feature_map return [fpn_features['top_down_block2'], fpn_features['top_down_block3'], fpn_features['top_down_block4'], coarse_features['bottom_up_block5'], coarse_features['bottom_up_block6']]
def _GetAttentionModel( self, images, num_classes, weight_decay=0.0001, attention_nonlinear=_SUPPORTED_ATTENTION_NONLINEARITY[0], attention_type=_SUPPORTED_ATTENTION_TYPES[0], kernel=1, training_resnet=False, training_attention=False, reuse=False): """Constructs attention model on resnet_v1_50. Args: images: A tensor of size [batch, height, width, channels] num_classes: The number of output classes. weight_decay: The parameters for weight_decay regularizer. attention_nonlinear: Type of non-linearity on top of the attention function. attention_type: Type of the attention structure. kernel: Convolutional kernel to use in attention layers (eg, [3, 3]). training_resnet: Whether or not the Resnet blocks from the model are in training mode. training_attention: Whether or not the attention part of the model is in training mode. reuse: Whether or not the layer and its variables should be reused. Returns: logits: A tensor of size [batch, num_classes]. attention_prob: Attention score after the non-linearity. attention_score: Attention score before the non-linearity. feature_map: Features extracted from the model, which are not l2-normalized. """ attention_feat, attention_prob, attention_score, feature_map, _ = ( self.GetAttentionPrelogit( images, weight_decay, attention_nonlinear=attention_nonlinear, attention_type=attention_type, kernel=kernel, training_resnet=training_resnet, training_attention=training_attention, reuse=reuse)) with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=weight_decay, batch_norm_scale=True)): with slim.arg_scope([slim.batch_norm], is_training=training_attention): with tf.variable_scope( _ATTENTION_VARIABLE_SCOPE, values=[attention_feat], reuse=reuse): logits = slim.conv2d( attention_feat, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') logits = tf.squeeze(logits, [1, 2], name='spatial_squeeze') return logits, attention_prob, attention_score, feature_map
def model(images, weight_decay=1e-5, is_training=True, eval=False): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images, eval) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: # logging.info(i) # logging.info(g[i-1].get_shape().as_list()) #logging.info(f[i].get_shape().as_list()) c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) ''' F_score = slim.conv2d(end_points['pool2'], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) geo_map = slim.conv2d(end_points['pool2'], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(end_points['pool2'], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] ''' F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: depth multiplier is not supported. """ if self._depth_multiplier != 1.0: raise ValueError('Depth multiplier not supported.') preprocessed_inputs = shape_utils.check_min_image_dim( 129, preprocessed_inputs) with tf.variable_scope(self._resnet_scope_name, reuse=self._reuse_weights) as scope: with slim.arg_scope(resnet_v1.resnet_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = self._resnet_base_fn( inputs=ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), sparsity_type=self._sparsity_type, sparse_dense_branch=self._sparse_dense_branch, num_classes=None, is_training=None, global_pool=False, output_stride=None, include_root_block=self._include_root_block, depthwise_convolution=self._depthwise_convolution, max_pool_subsample=self._max_pool_subsample, root_downsampling_rate=self._root_downsampling_rate, store_non_strided_activations=self. _store_non_strided_activations, scope=scope) image_features = self._filter_features(image_features) with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('multiscale_feature_extractor', reuse=self._reuse_weights): feature_block_list = [] for level in range(2, 6): feature_block_list.append('block{}'.format(level - 1)) multiscale_features = feature_map_generators.multiscale_fusion_feature_maps( [(key, image_features[key]) for key in feature_block_list], depth=256) feature_maps = [multiscale_features['feature_map']] return feature_maps
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': [ 'FeatureExtractor/{}/block3'.format(self._resnet_scope_name), 'FeatureExtractor/{}/block4'.format(self._resnet_scope_name), '', '', '', '' ], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_depthwise': self._use_depthwise, 'use_explicit_padding': self._use_explicit_padding, } if self._num_layers == 7: feature_map_layout['from_layer'] += [''] feature_map_layout['layer_depth'] += [64] with tf.variable_scope(self._resnet_scope_name, reuse=self._reuse_weights) as scope: with slim.arg_scope(resnet_v1.resnet_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): _, image_features = self._resnet_base_fn( inputs=ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=None, global_pool=False, output_stride=None, store_non_strided_activations=True, min_base_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None, None] h = [None, None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: ################ Modified by Xiaolong March. 9th #################### g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) h[4] = GlobalAveragePooling2D()(g[3]) # the predicted class score is mapped back to the previous convolutional layer to generate # the class activation mapa. the CAm highlights h4_tile = tf.tile(tf.reshape(h[4],[-1, 1, num_outputs[3], 1]), [1, tf.shape(g[3])[1], 1, 1]) ram = tf.matmul(g[3],h4_tile) g[4] = slim.conv2d(ram, num_outputs[3], 3) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[4], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[4], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[4], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry, ram
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) geo_map = slim.conv2d( g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d( g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def hmnet_layer_2(inputs, kp_num=1): depth = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) with tf.variable_scope('heatmap_layer_2'): with tf.contrib.slim.arg_scope(resnet_arg_scope()): net = slim.conv2d_transpose(inputs, 64, [3, 3], stride=2, padding='SAME', scope='deconv3') return net
def get_network_fn(num_classes, weight_decay=0.0): arg_scope = resnet_v1.resnet_arg_scope(weight_decay=weight_decay) func = resnet_v1.resnet_v1_50 @functools.wraps(func) def network_fn(images): with slim.arg_scope(arg_scope): return func(images, num_classes) if hasattr(func, 'default_image_size'): network_fn.default_image_size = func.default_image_size return(network_fn)
def ResNet50Model(input_tensor, weight_decay=1e-5, is_training=True): with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): input_tensor = tf.image.resize_images(input_tensor, [224, 224]) logits, end_points = resnet_v1.resnet_v1_50(input_tensor, is_training=is_training, scope='resnet_v1_50') feature = tf.reduce_mean(logits, reduction_indices=[1, 2]) fc1 = tf.contrib.layers.fully_connected(feature, num_outputs=512) fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=10) return fc2
def _resnet_rf(csv_writer=None): """Computes RF and associated parameters for resnet models. The computed values are written to stdout. Args: csv_writer: A CSV writer for RF parameters, which is used if it is not None. """ for model_type in _SUPPORTED_RESNET_VARIANTS: arg_sc = resnet_v1.resnet_arg_scope() _process_model_rf(model_type, csv_writer, arg_sc)
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] Raises: ValueError: depth multiplier is not supported. """ if self._depth_multiplier != 1.0: raise ValueError('Depth multiplier not supported.') preprocessed_inputs = shape_utils.check_min_image_dim( 129, preprocessed_inputs) with tf.variable_scope( self._resnet_scope_name, reuse=self._reuse_weights) as scope: with slim.arg_scope(resnet_v1.resnet_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): with slim.arg_scope( [resnet_v1.bottleneck], use_bounded_activations=self._use_bounded_activations): _, activations = self._resnet_base_fn( inputs=ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=None, global_pool=False, output_stride=None, store_non_strided_activations=True, scope=scope) with slim.arg_scope(self._conv_hyperparams_fn()): feature_maps = feature_map_generators.pooling_pyramid_feature_maps( base_feature_map_depth=self._base_feature_map_depth, num_layers=self._num_layers, image_features={ 'image_features': self._filter_features(activations)['block3'] }) return feature_maps.values()
def resnet_v1_50_16s(image_batch_tensor, number_of_classes, is_training): """Returns the resnet_v1_50_16s model definition. The function returns the model definition of a network that was described in 'DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs' by Chen et al. The network subsamples the input by a factor of 16 and uses the bilinear upsampling kernel to upsample prediction by a factor of 16. This means that if the image size is not of the factor 16, the prediction of different size will be delivered. To adapt the network for an any size input use adapt_network_for_any_size_input(resnet_v1_50_16s, 16). Note: the upsampling kernel is fixed in this model definition, because it didn't give significant improvements according to aforementioned paper. Parameters ---------- image_batch_tensor : [batch_size, height, width, depth] Tensor Tensor specifying input image batch number_of_classes : int An argument specifying the number of classes to be predicted. For example, for PASCAL VOC it is 21. is_training : boolean An argument specifying if the network is being evaluated or trained. Returns ------- upsampled_logits : [batch_size, height, width, number_of_classes] Tensor Tensor with logits representing predictions for each class. Be careful, the output can be of different size compared to input, use adapt_network_for_any_size_input to adapt network for any input size. Otherwise, the input images sizes should be of multiple 8. resnet_v1_50_16s_variables_mapping : dict {string: variable} Dict which maps the resnet_v1_50_16s model's variables to resnet_v1_50 checkpoint variables names. We need this to initilize the weights of resnet_v1_50_16s model with resnet_v1_50 from checkpoint file. Look at ipython notebook for examples. """ with tf.variable_scope("resnet_v1_50_16s") as resnet_v1_50_16s: upsample_factor = 16 # Convert image to float32 before subtracting the # mean pixel value image_batch_float = tf.to_float(image_batch_tensor) # Subtract the mean pixel value from each pixel mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN] upsample_filter_np = bilinear_upsample_weights(upsample_factor, number_of_classes) upsample_filter_tensor = tf.constant(upsample_filter_np) # TODO: make pull request to get this custom vgg feature accepted # to avoid using custom slim repo. with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(mean_centered_image_batch, number_of_classes, is_training=is_training, global_pool=False, output_stride=16) downsampled_logits_shape = tf.shape(logits) # Calculate the ouput size of the upsampled tensor upsampled_logits_shape = tf.pack([ downsampled_logits_shape[0], downsampled_logits_shape[1] * upsample_factor, downsampled_logits_shape[2] * upsample_factor, downsampled_logits_shape[3] ]) # Perform the upsampling upsampled_logits = tf.nn.conv2d_transpose(logits, upsample_filter_tensor, output_shape=upsampled_logits_shape, strides=[1, upsample_factor, upsample_factor, 1]) # Map the original vgg-16 variable names # to the variables in our model. This is done # to make it possible to use assign_from_checkpoint_fn() # while providing this mapping. # TODO: make it cleaner resnet_v1_50_16s_variables_mapping = {} resnet_v1_50_16s_variables = slim.get_variables(resnet_v1_50_16s) for variable in resnet_v1_50_16s_variables: # Here we remove the part of a name of the variable # that is responsible for the current variable scope original_resnet_v1_50_checkpoint_string = variable.name[len(resnet_v1_50_16s.original_name_scope):-2] resnet_v1_50_16s_variables_mapping[original_resnet_v1_50_checkpoint_string] = variable return upsampled_logits, resnet_v1_50_16s_variables_mapping