def resnet_tensorboard(): x_input = tf.placeholder(dtype=tf.float32, shape=(None, image_size, image_size, 3)) arg_scope = resnet_utils.resnet_arg_scope() with slim.arg_scope(arg_scope): logits_50, end_points_50 = resnet_v1.resnet_v1_50(x_input, num_classes=1000, is_training=False, global_pool=True, output_stride=None, spatial_squeeze=True, store_non_strided_activations=False, reuse=False, scope='resnet_v1_50') logits_101, end_points_101 = resnet_v1.resnet_v1_101(x_input, num_classes=1000, is_training=False, global_pool=True, output_stride=None, spatial_squeeze=True, store_non_strided_activations=False, reuse=False, scope='resnet_v1_101') config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config= config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter('/Users/alexwang/data/resnet_summary', graph=sess.graph) summary_writer.close()
def res_101_fcn_8s(inputs, num_classes, is_training): with tf.variable_scope('res_101_fcn_8s'): # Use the structure of res_v1_101 classification network with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101(inputs, num_classes, is_training=is_training, global_pool=False, output_stride=8) # Deconvolutional layers to recover the size of input image # Padding is 'SAME' for conv layers thus conv layers do not change the size # There are 5 max-pool layers with size reduced by half # Totally size reduced by scale of 2^5 = 32 times # That's also the reason why this model is called fcn_32s # Use bilinear interpolation for upsampling upsample_filter = upsampling.bilinear_upsample_weights(8, num_classes) upsample_filter_tensor = tf.constant(upsample_filter) shape = tf.shape(net) output = tf.nn.conv2d_transpose(net, upsample_filter_tensor, output_shape=tf.stack([ shape[0], shape[1] * 8, shape[2] * 8, shape[3] ]), strides=[1, 8, 8, 1]) variables = slim.get_variables('res_101_fcn_8s') # Extract variables that are the same as original vgg-16, they could be intialized # with pre-trained vgg-16 network res_variables = {} for variable in variables: res_variables[variable.name[15:-2]] = variable return output, res_variables
def build_FPN(images, config, is_training, backbone='resnet50'): # images: [batch, h, w, channels] # Return: pyramid_feature Dict{P2, P3, P4, P5} of feature maps from different level of the # feature pyramid. Each is [batch, height, width, channels] pyramid = {} # build backbone network with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)): if backbone == "resnet50": logits, end_points = resnet_v1.resnet_v1_50( images, is_training=is_training, scope='resnet_v1_50') pyramid['C2'] = end_points[ 'resnet_v1_50/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_50/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_50/block3/unit_5/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_50/block4/unit_3/bottleneck_v1'] elif backbone == "resnet101": logits, end_points = resnet_v1.resnet_v1_101( images, is_training=is_training, scope='resnet_v1_101') pyramid['C2'] = end_points[ 'resnet_v1_101/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_101/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_101/block3/unit_22/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_101/block4/unit_3/bottleneck_v1'] else: print("Unkown backbone : ", backbone) # build FPN pyramid_feature = {} arg_scope = _extra_conv_arg_scope_with_bn() with tf.variable_scope('FPN'): with slim.arg_scope(arg_scope): pyramid_feature['P5'] = slim.conv2d(pyramid['C5'], config.TOP_DOWN_PYRAMID_SIZE, 1) for i in range(4, 1, -1): upshape = tf.shape(pyramid['C%d' % i]) u = tf.image.resize_bilinear(pyramid_feature['P%d' % (i+1)], \ size = (upshape[1], upshape[2])) c = slim.conv2d(pyramid['C%d' % i], config.TOP_DOWN_PYRAMID_SIZE, 1) s = tf.add(c, u) pyramid_feature['P%d' % i] = slim.conv2d( s, config.TOP_DOWN_PYRAMID_SIZE, 3) return pyramid_feature
def __init__(self, images): self.layer = {} self.images = images with slim.arg_scope(resnet_v1.resnet_arg_scope()): self.nets, _ = resnet_v1.resnet_v1_101(self.images, 1000, is_training=False, spatial_squeeze=False, global_pool=False, output_stride=16) print(len(self.nets)) for index in range(len(self.nets)): print("resnet_bolck_%d" % (index + 1)) print(self.nets[index].get_shape()) self.layer['block1'] = self.nets[0] self.layer['block2'] = self.nets[1] self.layer['block3'] = self.nets[2] self.layer['block4'] = self.nets[3]
def model(x, H, reuse, is_training=True): if H['slim_basename'] == 'resnet_v1_101': with slim.arg_scope(resnet.resnet_arg_scope()): _, T = resnet.resnet_v1_101(x, is_training=is_training, num_classes=1000, reuse=reuse) elif H['slim_basename'] == 'InceptionV1': with slim.arg_scope(inception.inception_v1_arg_scope()): _, T = inception.inception_v1(x, is_training=is_training, num_classes=1001, spatial_squeeze=False, reuse=reuse) #print '\n'.join(map(str, [(k, v.op.outputs[0].get_shape()) for k, v in T.iteritems()])) coarse_feat = T[H['slim_top_lname']][:, :, :, :H['later_feat_channels']] assert coarse_feat.op.outputs[0].get_shape()[3] == H['later_feat_channels'] # fine feat can be used to reinspect input attention_lname = H.get('slim_attention_lname', 'Mixed_3b') early_feat = T[attention_lname] return coarse_feat, early_feat
def build_gcn(inputs, num_classes, preset_model='GCN-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"): """ Builds the GCN model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: GCN model """ inputs = mean_image_subtraction(inputs) if preset_model == 'GCN-Res50': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(inputs, is_training=is_training, scope='resnet_v1_50') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'GCN-Res101': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101(inputs, is_training=is_training, scope='resnet_v1_101') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'GCN-Res152': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152(inputs, is_training=is_training, scope='resnet_v1_152') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError("Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model)) res = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] down_5 = GlobalConvBlock(res[0], n_filters=21, size=3) down_5 = BoundaryRefinementBlock(down_5, n_filters=21, kernel_size=[3, 3]) down_5 = ConvUpscaleBlock(down_5, n_filters=21, kernel_size=[3, 3], scale=2) down_4 = GlobalConvBlock(res[1], n_filters=21, size=3) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = tf.add(down_4, down_5) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = ConvUpscaleBlock(down_4, n_filters=21, kernel_size=[3, 3], scale=2) down_3 = GlobalConvBlock(res[2], n_filters=21, size=3) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = tf.add(down_3, down_4) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = ConvUpscaleBlock(down_3, n_filters=21, kernel_size=[3, 3], scale=2) down_2 = GlobalConvBlock(res[3], n_filters=21, size=3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = tf.add(down_2, down_3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = ConvUpscaleBlock(down_2, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) net = ConvUpscaleBlock(net, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(net, n_filters=21, kernel_size=[3, 3]) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_refinenet(inputs, num_classes, preset_model='RefineNet-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"): """ Builds the RefineNet model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: RefineNet model """ inputs = mean_image_subtraction(inputs) if preset_model == 'RefineNet-Res50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training, scope='resnet_v1_50') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'RefineNet-Res101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs, is_training=is_training, scope='resnet_v1_101') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'RefineNet-Res152': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152( inputs, is_training=is_training, scope='resnet_v1_152') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError( "Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model)) f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] g = [None, None, None, None] h = [None, None, None, None] for i in range(4): h[i] = slim.conv2d(f[i], 256, 1) g[0] = RefineBlock(high_inputs=None, low_inputs=h[0]) g[1] = RefineBlock(g[0], h[1]) g[2] = RefineBlock(g[1], h[2]) g[3] = RefineBlock(g[2], h[3]) # g[3]=Upsampling(g[3],scale=4) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.conv2d(g[3], num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def res_v1_101_lstm(input_imgs, input_seqs, input_masks, batch_size, embedding_size, vocab_size, is_training, lstm_dropout_keep_prob): with tf.variable_scope('res_v1_101_lstm'): # Sequence embedding layer with tf.variable_scope("seq_embedding"): embedding_map = tf.get_variable( name="map", shape=[vocab_size, embedding_size], initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) # Image feature extraction layer with slim.arg_scope(resnet_v1.resnet_arg_scope(trainable=is_training)): # Set is_training = False to fix running mean/variance of batch normalization image_feature, _ = resnet_v1.resnet_v1_101(input_imgs, None, is_training=False, output_stride=32) # Image embedding layer image_feature = tf.squeeze(image_feature, axis=[1, 2]) image_embedding = slim.fully_connected(image_feature, embedding_size, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(0, 0.01), biases_initializer=tf.zeros_initializer, weights_regularizer=slim.l2_regularizer(0.0005), scope = 'image_embedding') # LSTM layer lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=embedding_size, state_is_tuple=True) # Training process if is_training is True: lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, input_keep_prob=lstm_dropout_keep_prob, output_keep_prob=lstm_dropout_keep_prob) seq_embeddings = tf.nn.embedding_lookup(embedding_map, input_seqs) with tf.variable_scope("lstm", initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) as lstm_scope: # Feed the image embeddings to set the initial LSTM state. zero_state = lstm_cell.zero_state(batch_size=batch_size, dtype=tf.float32) _, initial_state = lstm_cell(image_embedding, zero_state) lstm_scope.reuse_variables() sequence_length = tf.reduce_sum(input_masks, 1) lstm_outputs, _ = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=seq_embeddings, sequence_length=sequence_length, initial_state=initial_state, dtype=tf.float32, scope=lstm_scope) lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size]) # Word logits layer output_logits = slim.fully_connected(lstm_outputs, vocab_size, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(0, 0.01), biases_initializer=tf.zeros_initializer, weights_regularizer=slim.l2_regularizer(0.0005), scope='logits' ) variables = slim.get_variables('res_v1_101_lstm') res_variables = {} for variable in variables: if 'resnet_v1_101' in variable.name: res_variables[variable.name[16:-2]] = variable return output_logits, res_variables # Inference process else: weights = tf.get_variable("logits/weights", [embedding_size, vocab_size]) biases = tf.get_variable("logits/biases", [vocab_size]) with tf.variable_scope("lstm", initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) as lstm_scope: # Feed the image embeddings to set the initial LSTM state. zero_state = lstm_cell.zero_state(batch_size=batch_size, dtype=tf.float32) _, initial_state = lstm_cell(image_embedding, zero_state) lstm_scope.reuse_variables() memory_state = initial_state output_words = [input_seqs[0]] # TODO: replace the end condition of the loop with meeting the end word for _ in range(30): input_seqs = tf.nn.embedding_lookup(embedding_map, input_seqs) output_seqs, memory_state = lstm_cell(input_seqs, memory_state) output_logits = tf.matmul(output_seqs, weights) + biases output_word = tf.argmax(output_logits, -1) output_words.append(output_word[0]) input_seqs = output_word output_words = tf.stack(output_words) return output_words
def build_pspnet(inputs, label_size, num_classes, preset_model='PSPNet-Res50', pooling_type="MAX", weight_decay=1e-5, upscaling_method="bilinear", is_training=True, pretrained_dir="models"): """ Builds the PSPNet model. Arguments: inputs: The input tensor label_size: Size of the final label tensor. We need to know this for proper upscaling preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes pooling_type: Max or Average pooling Returns: PSPNet model """ inputs = mean_image_subtraction(inputs) if preset_model == 'PSPNet-Res50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training, scope='resnet_v1_50') # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'PSPNet-Res101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs, is_training=is_training, scope='resnet_v1_101') # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'PSPNet-Res152': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152( inputs, is_training=is_training, scope='resnet_v1_152') # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError( "Unsupported ResNet model '%s'. This function only supports ResNet 50, ResNet 101, and ResNet 152" % (preset_model)) f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] feature_map_shape = [int(x / 8.0) for x in label_size] psp = PyramidPoolingModule(f[2], feature_map_shape=feature_map_shape, pooling_type=pooling_type) net = slim.conv2d(psp, 512, [3, 3], activation_fn=None) net = slim.batch_norm(net) net = tf.nn.relu(net) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.dropout(net, keep_prob=(0.9)) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn