def denseASPP(inputs, is_training, output_stride, pre_trained_model, classes, keep_prob=1.0): with tf.contrib.slim.arg_scope( resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)): logits, end_points = resnet_v2.resnet_v2_101( inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=output_stride) if is_training: exclude = ['resnet_v2_101' + '/logits', 'global_step'] variables_to_restore = tf.contrib.slim.get_variables_to_restore( exclude=exclude) tf.train.init_from_checkpoint( pre_trained_model, {v.name.split(':')[0]: v for v in variables_to_restore}) net = end_points['resnet_v2_101' + '/block4'] with tf.name_scope("denseASPP"): input = denseASPP_block(net, is_training, keep_prob) with tf.contrib.slim.arg_scope( resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)): with tf.name_scope("segmentation"): input_shape = input.get_shape().as_list() input = tf.nn.dropout(input, keep_prob=keep_prob) weight_1 = weight_variable([1, 1, input_shape[-1], classes]) bias = bias_variable([classes]) input = tf.nn.conv2d(input, weight_1, [1, 1, 1, 1], padding='SAME') + bias with tf.name_scope("upsamling"): input_shape = input.get_shape().as_list() input = tf.image.resize_bilinear(input, tf.shape(inputs)[1:3]) output = input return output
def model(inputs, is_training): """Constructs the ResNet model given the inputs.""" if data_format == 'channels_first': # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). # This provides a large performance boost on GPU. See # https://www.tensorflow.org/performance/performance_guide#data_formats inputs = tf.transpose(inputs, [0, 3, 1, 2]) # tf.logging.info('net shape: {}'.format(inputs.shape)) # encoder with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): logits, end_points = base_model(inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=output_stride) if is_training: exclude = [base_architecture + '/logits', 'global_step'] variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude) tf.train.init_from_checkpoint(pre_trained_model, {v.name.split(':')[0]: v for v in variables_to_restore}) inputs_size = tf.shape(inputs)[1:3] net = end_points[base_architecture + '/block4'] encoder_output = atrous_spatial_pyramid_pooling(net, output_stride, batch_norm_decay, is_training) # encoder_output = lstm(encoder_output) with tf.variable_scope("decoder"): with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): with arg_scope([layers.batch_norm], is_training=is_training): with tf.variable_scope("low_level_features"): low_level_features = end_points[base_architecture + '/block1/unit_3/bottleneck_v2/conv1'] low_level_features = layers_lib.conv2d(low_level_features, 48, [1, 1], stride=1, scope='conv_1x1') low_level_features_size = tf.shape(low_level_features)[1:3] with tf.variable_scope("upsampling_logits"): net = tf.image.resize_bilinear(encoder_output, low_level_features_size, name='upsample_1') net = tf.concat([net, low_level_features], axis=3, name='concat') net = layers_lib.conv2d(net, 256, [3, 3], stride=1, scope='conv_3x3_1') net = layers_lib.conv2d(net, 256, [3, 3], stride=1, scope='conv_3x3_2') net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv_1x1') logits = tf.image.resize_bilinear(net, inputs_size, name='upsample_2') return logits
def main(_): with tf.Graph().as_default(): url = 'https://upload.wikimedia.org/wikipedia/commons/5/5c/Tigershark3.jpg' image_string = urllib.urlopen(url).read() image = tf.image.decode_jpeg(image_string, channels=3) processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_50(processed_images, num_classes=1001, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v2_50.ckpt'), slim.get_model_variables('resnet_v2_50')) with tf.Session() as sess: init_fn(sess) np_image, probabilities = sess.run([image, probabilities]) probabilities = np.reshape(probabilities, [1001]) print(probabilities.shape) sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1])] print(sorted_inds) plt.figure() plt.imshow(np_image.astype(np.uint8)) plt.axis('off') plt.show() names = imagenet.create_readable_names_for_imagenet_labels() for i in range(5): index = sorted_inds[i] # Shift the index of a class name by one. print('Probability %0.6f%% => [%s]' % (probabilities[index] * 100, names[index + 1]))
def ASPP(inputs, output_stride, batch_norm_decay, is_training, depth=256): if output_stride not in [8, 16]: raise ValueError('output_stride must be either 8 or 16.') # atrous_rates = [6, 12, 18] if output_stride == 8: atrous_rates = [2*rate for rate in atrous_rates] # #why do we need arg_scope of resnet_v2 with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): with arg_scope([layers.batch_norm], is_training=is_training): inputs_size = tf.shape(inputs)[1:3] conv_11 = layers_lib.conv2d(inputs, depth, [1, 1], stride = 1, scope = "conv_1x1") conv_33_1 = layers_lib.conv2d(inputs, depth, [3, 3], stride = 1, rate = atrous_rates[0], scope = 'conv_3x3_1') conv_33_2 = layers_lib.conv2d(inputs, depth, [3, 3], stride = 1, rate = atrous_rates[1], scope = 'conv_3x3_2') conv_33_3 = layers_lib.conv2d(inputs, depth, [3, 3], stride = 1, rate = atrous_rates[2], scope = 'conv_3x3_3') # with tf.variable_scope("image_level_features"): image_level_features = tf.reduce_mean(inputs, [1,2], name = 'global_average_pooling', keepdims = True) image_level_features = layers_lib.conv2d(image_level_features, depth, [1,1], stride = 1, scope = 'conv_1x1') image_level_features = tf.image.resize_bilinear(image_level_features, inputs_size, name='upsample') # net = tf.concat([conv_11, conv_33_1, conv_33_2, conv_33_3, image_level_features], axis = 3, name = 'concat') net = layers_lib.conv2d(net, depth, [1, 1], stride = 1, scope = 'conv_1x1_concat') # return net
def resnet_model(images, is_training, reuse=tf.AUTO_REUSE): with tf.contrib.framework.arg_scope(resnet_v2.resnet_arg_scope()): resnet_fn = RESNET_MODELS[model_name] logits, _ = resnet_fn(images, num_classes, is_training=is_training, reuse=reuse) logits = tf.reshape(logits, [-1, num_classes]) return logits
def model(images, filter_type, filter_trainable, weight_decay, batch_size, is_training, num_classes=2): with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): inputs = get_residuals(images, filter_type, filter_trainable) _, end_points = resnet_small(inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=None, include_root_block=False) net = end_points['resnet_small/block4'] net = tf.nn.conv2d_transpose(net, tf.Variable(bilinear_upsample_weights(4,64,1024),dtype=tf.float32,name='bilinear_kernel0'), \ [batch_size, tf.shape(end_points['resnet_small/block2'])[1], tf.shape(end_points['resnet_small/block2'])[2], 64], strides=[1, 4, 4, 1], padding="SAME") end_points['upsample1'] = net net = tf.nn.conv2d_transpose(net, tf.Variable(bilinear_upsample_weights(4,4,64),dtype=tf.float32,name='bilinear_kernel1'), \ [batch_size, tf.shape(inputs)[1], tf.shape(inputs)[2], 4], strides=[1, 4, 4, 1], padding="SAME") end_points['upsample2'] = net net = layers.batch_norm(net, activation_fn=tf.nn.relu, is_training=is_training, scope='post_norm') logits = slim.conv2d(net, num_classes, [5, 5], activation_fn=None, normalizer_fn=None, scope='logits') preds = tf.cast(tf.argmax(logits, 3), tf.int32) preds_map = tf.nn.softmax(logits)[:, :, :, 1] return logits, preds, preds_map, net, end_points, inputs
def load(self, **kwargs): session = kwargs["session"] assert isinstance(session, tf.Session) x_input = tf.placeholder(self.x_dtype, shape=(None, ) + self.x_shape) with slim.arg_scope(resnet_v2.resnet_arg_scope()): resnet_v2.resnet_v2_152(x_input, num_classes=self.n_class, is_training=False, reuse=tf.AUTO_REUSE) model_path = get_model_path('resnet_v2_152') if not os.path.exists(model_path): os.makedirs(model_path) urllib.request.urlretrieve( 'http://download.tensorflow.org/models/resnet_v2_152_2017_04_14.tar.gz', os.path.join(model_path, 'resnet_v2_152_2017_04_14.tar.gz'), show_progress) tar = tarfile.open( os.path.join(model_path, 'resnet_v2_152_2017_04_14.tar.gz')) file_names = tar.getnames() for file_name in file_names: tar.extract(file_name, model_path) saver = tf.train.Saver(slim.get_model_variables(scope='resnet_v2')) saver.restore(session, os.path.join(model_path, 'resnet_v2_152.ckpt'))
def yolonet(images, is_training=True): with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) output_depth = NUM_CLASS + 5 * BOX_PER_CELL with slim.arg_scope(resnet_v2.resnet_arg_scope()): bottleneck, _ = resnet_v2.resnet_v2_50(images, global_pool=False, is_training=is_training) with arg_scope([layers.batch_norm], is_training=is_training): net = bottleneck net = layers_lib.conv2d(net, 512, [1, 1], normalizer_fn=layers.batch_norm, scope='yolo_layer1') net = layers_lib.conv2d(net, 512, [3, 3], normalizer_fn=layers.batch_norm, scope='yolo_layer2') net = layers_lib.conv2d(net, 512, [3, 3], normalizer_fn=layers.batch_norm, scope='yolo_layer3') net = layers_lib.conv2d(net, output_depth, [1, 1], activation_fn=None, normalizer_fn=None, scope='yolo_output') return net
def after_create_session(self, session, coord=None): if session.run(tf.train.get_or_create_global_step()) > 0: self.init_fn(session) arg_scope = resnet_v2.resnet_arg_scope() with slim.arg_scope(arg_scope): gs_init = global_step.assign(0) session.run(gs_init)
def featureExtractor(input, feature_norm, model='101', reuse=False, scope='resnet_v2_101'): with tf.variable_scope(scope, reuse=reuse) as scope: with slim.arg_scope(resnet_v2.resnet_arg_scope()): input = scale_RGB(input) if '50' in model: net, end_points = resnet_v2.resnet_v2_50(input, global_pool=True, is_training=False, reuse=reuse) elif '101' in model: _, end_points = resnet_v2.resnet_v2_101(input, global_pool=True, is_training=False, reuse=reuse) #f = end_points['stabNet/pathFinder/featureExtractor/resnet_v2_101/block3/unit_23/bottleneck_v2/conv1'] #(18 X 32) X (18 X 32) f = end_points[ '{}/resnet_v2_101/block4/unit_2/bottleneck_v2/conv1'. format(scope.name)] #(9 X 16) X (9 X 16) if feature_norm: f = featureL2Norm(f) return f
def CNN(inputs): with tf.variable_scope("CNN"): # layer = slim.conv2d(inputs, 64, [8,8], [2,4], normalizer_fn=slim.batch_norm, activation_fn=None) # layer [B H//2 W//4 64] # tf.summary.image('zoom', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6) # layer = utils_nn.resNet50(layer, True, [2,1]) # [N H//32 W 2048] # tf.summary.image('2_res50', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6) # with slim.arg_scope(inception.inception_v3_arg_scope()): # with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True): # layer, _ = inception.inception_v3_base(inputs, final_endpoint="Mixed_5d") # layer = utils_nn.resNet101(inputs, True) with slim.arg_scope(resnet_v2.resnet_arg_scope()): layer, _ = resnet_v2.resnet_v2_152(inputs, None, is_training=True, global_pool=False, output_stride=16) # 直接将网络拉到256 [N 1 256 256] with tf.variable_scope("Normalize"): layer = slim.conv2d(layer, 1024, [2, 2], [2, 1], normalizer_fn=slim.batch_norm, activation_fn=None) layer = slim.conv2d(layer, 512, [1, 1], normalizer_fn=slim.batch_norm, activation_fn=None) layer = slim.conv2d(layer, 256, [1, 1], normalizer_fn=slim.batch_norm, activation_fn=None) return layer
def load(self, **kwargs): session = kwargs["session"] assert isinstance(session, tf.Session) x_input = tf.placeholder(tf.float32, shape=(None, ) + self.x_shape) with tf.contrib.framework.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_50(x_input, self.n_class, is_training=False, reuse=tf.AUTO_REUSE) model_path = get_model_path('alp') url = 'http://download.tensorflow.org/models/adversarial_logit_pairing/imagenet64_alp025_2018_06_26.ckpt.tar.gz' fname = os.path.join(model_path, url.split('/')[-1]) if not os.path.exists(fname): if not os.path.exists(model_path): os.makedirs(model_path) from six.moves import urllib urllib.request.urlretrieve(url, fname, show_progress) import tarfile t = tarfile.open(fname) t.extractall(model_path) print('Extracted model') saver = tf.train.Saver() saver.restore(session, fname.split('.tar.gz')[0])
def get_resnet(x_tensor, reuse, is_training, x_batch_size): with tf.variable_scope('resnet', reuse=reuse): with slim.arg_scope(resnet_v2.resnet_arg_scope()): resnet, end_points = resnet_v2.resnet_v2_50( x_tensor, global_pool=False, is_training=is_training, reuse=reuse, output_stride=32) global_pool = tf.reduce_mean(resnet, [1, 2]) with tf.variable_scope('fc'): global_pool = slim.fully_connected(global_pool, 2048, scope='fc/fc_1') global_pool = slim.fully_connected(global_pool, 1024, scope='fc/fc_2') global_pool = slim.fully_connected(global_pool, 512, scope='fc/fc_3') theta = output_layer(global_pool, (grid_h + 1) * (grid_w + 1) * 2) with tf.name_scope('gen_theta'): id2_loss = tf.reduce_mean(tf.abs(theta)) * id_mul return theta, id2_loss, id2_loss
def model(inputs, is_training): """Constructs the ResNet model given the inputs.""" if data_format == 'channels_first': # Convert the inputs from channels_last (NHWC) to channels_first (NCHW). # This provides a large performance boost on GPU. See # https://www.tensorflow.org/performance/performance_guide#data_formats inputs = tf.transpose(inputs, [0, 3, 1, 2]) with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): logits, end_points = base_model(inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=output_stride) variables_to_restore = None if is_training: exclude = [base_architecture + '/logits', 'global_step'] variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude) inputs_size = tf.shape(inputs)[1:3] net = end_points[base_architecture + '/block4'] net = atrous_spatial_pyramid_pooling(net, output_stride, batch_norm_decay, is_training) with tf.variable_scope("upsampling_logits"): net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv_1x1') logits = tf.image.resize_bilinear(net, inputs_size, name='upsample') return logits, variables_to_restore
def _ConvNet2D(self, x, is_training, reuse=False): with slim.arg_scope(resnet_v2.resnet_arg_scope()): f, _ = resnet_v2.resnet_v2_50(x, num_classes=None, is_training=is_training, global_pool=False, reuse=reuse) print("resnet.out.shape: %s" % f.get_shape()) with tf.variable_scope("ConvNet2D", reuse=reuse): f = tf.reduce_mean(f, [1, 2], name='global_avg_pooling', keep_dims=True) z = slim.conv2d(f, 4096, [1, 1], padding='VALID', normalizer_fn=None, scope='f2zfeture') z = slim.conv2d(z, self.z_dim, [1, 1], padding='VALID', normalizer_fn=None, scope='z_2d') #g_feature = tf.squeeze(g_feature, [1, 2], name='global_spatial_squeeze') return tf.expand_dims(z, 1)
def resnet_v2_50(init_weights): tf_compat.reset_default_graph() image_size = 224 inputs = tf_compat.random_normal([1, image_size, image_size, 3]) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_50(inputs, 1000, is_training=False) return tf_compat.get_default_graph()
def pyramid_pooling(input, is_training): with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)): with tf.name_scope("pyramid_pooling"): input_shape = input.get_shape().as_list() num_output_features = input_shape[-1] // LEVEL_SIZE with tf.name_scope("pool_1"): pool_bin_1 = tf.nn.avg_pool(input, [1, input_shape[1], input_shape[2], 1], [1, input_shape[1], input_shape[2], 1], padding='VALID') weight_1 = weight_variable([1, 1, input_shape[-1], num_output_features]) output_1 = tf.nn.conv2d(pool_bin_1, weight_1, [1, 1, 1, 1], padding='SAME') output_1 = batch_norm(output_1, is_training) output_1 = tf.nn.relu(output_1) output_1 = tf.image.resize_bilinear(output_1, [input_shape[1], input_shape[2]]) with tf.name_scope("pool_2"): pool_bin_2 = tf.nn.avg_pool(input, [1, input_shape[1] // 2, input_shape[2] // 2, 1], [1, input_shape[1] // 2, input_shape[2] // 2, 1], padding='VALID') weight_2 = weight_variable([1, 1, input_shape[-1], num_output_features]) output_2 = tf.nn.conv2d(pool_bin_2, weight_2, [1, 1, 1, 1], padding='SAME') output_2 = batch_norm(output_2, is_training) output_2 = tf.nn.relu(output_2) output_2 = tf.image.resize_bilinear(output_2, [input_shape[1], input_shape[2]]) with tf.name_scope("pool_3"): pool_bin_3 = tf.nn.avg_pool(input, [1, input_shape[1] // 3, input_shape[2] // 3, 1], [1, input_shape[1] // 3, input_shape[2] // 3, 1], padding='VALID') weight_3 = weight_variable([1, 1, input_shape[-1], num_output_features]) output_3 = tf.nn.conv2d(pool_bin_3, weight_3, [1, 1, 1, 1], padding='SAME') output_3 = batch_norm(output_3, is_training) output_3 = tf.nn.relu(output_3) output_3 = tf.image.resize_bilinear(output_3, [input_shape[1], input_shape[2]]) with tf.name_scope("pool_6"): pool_bin_6 = tf.nn.avg_pool(input, [1, input_shape[1] // 6, input_shape[2] // 6, 1], [1, input_shape[1] // 6, input_shape[2] // 6, 1], padding='VALID') weight_6 = weight_variable([1, 1, input_shape[-1], num_output_features]) output_6 = tf.nn.conv2d(pool_bin_6, weight_6, [1, 1, 1, 1], padding='SAME') output_6 = batch_norm(output_6, is_training) output_6 = tf.nn.relu(output_6) output_6 = tf.image.resize_bilinear(output_6, [input_shape[1], input_shape[2]]) input = tf.concat([input, output_1], axis=-1) input = tf.concat([input, output_2], axis=-1) input = tf.concat([input, output_3], axis=-1) input = tf.concat([input, output_6], axis=-1) return input
def create_context_path(input_im): with slim.arg_scope(resnet_v2.resnet_arg_scope()): last_layer, end_points = resnet_v2.resnet_v2_101(input_im, is_training=True, scope='resnet_v2_101', global_pool=False) frontend_scope = 'resnet_v2_101' init_fn = slim.assign_from_checkpoint_fn( model_path=os.path.join('models', 'resnet_v2_101.ckpt'), var_list=slim.get_model_variables('resnet_v2_101'), ignore_missing_vars=True) layer_reduced16 = end_points[frontend_scope + '/block2'] layer_reduced32 = last_layer layer_arm16 = arm_module(layer_reduced16, n_filter_maps=512) layer_arm32 = arm_module(layer_reduced32, n_filter_maps=2048) layer_global_context = tf.reduce_mean(last_layer, axis=[1, 2], keepdims=True, name='global_context') ## Combining Context Features layer_context1 = tf.math.multiply(layer_arm32, layer_global_context) layer_context1 = layers.UpSampling2D( size=4, interpolation='bilinear')(layer_context1) layer_context2 = layers.UpSampling2D( size=2, interpolation='bilinear')(layer_arm16) context_output = tf.concat([layer_context1, layer_context2], axis=-1) return context_output, init_fn
def PSPNet(inputs, is_training, output_stride, pre_trained_model, classes): with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)): logits, end_points = resnet_v2.resnet_v2_101(inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=output_stride) if is_training: exclude = ['resnet_v2_101' + '/logits', 'global_step'] variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude) tf.train.init_from_checkpoint(pre_trained_model, {v.name.split(':')[0]: v for v in variables_to_restore}) net = end_points['resnet_v2_101' + '/block4'] encoder_output = pyramid_pooling(net, is_training) with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)): with tf.name_scope("auli_logits"): key = 'resnet_v2_101/block3' auxi_logits = end_points[key] auli_shape = auxi_logits.get_shape().as_list() weight_3 = weight_variable([3, 3, auli_shape[-1], auli_shape[-1] // 4]) auxi_logits = tf.nn.conv2d(auxi_logits, weight_3, [1, 1, 1, 1], padding='SAME') auxi_logits = batch_norm(auxi_logits, is_training) auxi_logits = tf.nn.relu(auxi_logits) weight_1 = weight_variable([1, 1, auli_shape[-1] // 4, classes]) bias = bias_variable([classes]) auxi_logits = tf.nn.conv2d(auxi_logits, weight_1, [1, 1, 1, 1], padding='SAME') + bias auxi_logits = tf.image.resize_bilinear(auxi_logits, tf.shape(inputs)[1:3]) with tf.name_scope("segmentation"): encoder_output_shape = encoder_output.get_shape().as_list() weight_3 = weight_variable([3, 3, encoder_output_shape[-1], encoder_output_shape[-1] // 4]) net = tf.nn.conv2d(encoder_output, weight_3, [1, 1, 1, 1], padding='SAME') net = batch_norm(net, is_training) net = tf.nn.relu(net) weight_1 = weight_variable([1, 1, encoder_output_shape[-1] // 4, classes]) bias = bias_variable([classes]) net = tf.nn.conv2d(net, weight_1, [1, 1, 1, 1], padding='SAME') + bias logits = tf.image.resize_bilinear(net, tf.shape(inputs)[1:3]) return auxi_logits, logits
def main(_): batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 itr = 30 a = FLAGS.input_dir tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) img_resize_tensor = tf.placeholder(tf.int32, [2]) x_input_resize = tf.image.resize_images( x_input, img_resize_tensor, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) shape_tensor = tf.placeholder(tf.int32, [3]) padded_input = padding_layer_iyswim(x_input_resize, shape_tensor) # 330 is the last value to keep 8*8 output, 362 is the last value to keep 9*9 output, stride = 32 padded_input.set_shape( (FLAGS.batch_size, FLAGS.image_resize, FLAGS.image_resize, 3)) with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50(x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 3) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) with tf.train.MonitoredSession( session_creator=session_creator) as sess: with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: for filenames, images in load_images(FLAGS.input_dir, batch_shape): if np.random.randint(0, 2, size=1) == 1: images = images[:, :, ::-1, :] resize_shape_ = np.random.randint(310, 331) labels = sess.run( predicted_labels, feed_dict={ x_input: images, img_resize_tensor: [resize_shape_] * 2, shape_tensor: np.array([ random.randint( 0, FLAGS.image_resize - resize_shape_), random.randint( 0, FLAGS.image_resize - resize_shape_), FLAGS.image_resize ]) }) labels = labels.flatten() for filename, label in zip(filenames, labels): out_file.write('{0},{1}\n'.format(filename, label))
def RES(inputs, seq_len, reuse = False): with tf.variable_scope("OCR", reuse=reuse): print("inputs shape:",inputs.shape) # layer = utils_nn.resNet101V2(inputs, True) # N H W/16 2048 # layer = utils_nn.resNet50(inputs, True, [2,1]) # (N H/16 W 2048) # with slim.arg_scope(inception.inception_v3_arg_scope()): # with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=True): # layer, _ = inception.inception_v3_base(inputs, final_endpoint="Mixed_5d") with slim.arg_scope(resnet_v2.resnet_arg_scope()): layer, _ = resnet_v2.resnet_v2_152(inputs, None, is_training=True, global_pool=False, output_stride=16) print("ResNet shape:",layer.shape) # 直接将网络拉到256 [N 1 512 256] with tf.variable_scope("Normalize"): layer = slim.conv2d(layer, 1024, [2,2], [2,1], normalizer_fn=slim.batch_norm, activation_fn=None) layer = slim.conv2d(layer, 512, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) layer = slim.conv2d(layer, 256, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # layer = utils_nn.resNet101(inputs, True) # with tf.variable_scope("ResNext"): # layer = slim.conv2d(inputs, 64, [2,4], [2,4], normalizer_fn=slim.batch_norm, activation_fn=None) # tf.summary.image('1_2_4_zoom', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6) # layer = utils_nn.resNext50(layer, True, [2,1]) # (N H/16 W 2048) # tf.summary.image('2_res50', tf.transpose (layer, [3, 1, 2, 0]), max_outputs=6) temp_layer = layer # with tf.variable_scope("Normalize"): # layer = slim.conv2d(layer, 1024, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # layer = slim.conv2d(layer, 512, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # layer = slim.conv2d(layer, 256, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # layer = slim.conv2d(layer, 128, [1,1], normalizer_fn=slim.batch_norm, activation_fn=None) # 将图像高度和宽度 // [2, 4] # layer = slim.avg_pool2d(layer, [2, 4], [2, 4]) print("ResNet shape:",layer.shape) # 增加坐标信息,增加的个数为 embedd_size # max_width_height, embedd_size # max_width_height 为缩放后的 w 的最大宽度,实际上的最大图片宽度为 max_width_height * 4 with tf.variable_scope("Coordinates"): max_width_height = MAX_IMAGE_WIDTH//8 embedd_size = 64 layer = Coordinates(layer, max_width_height, embedd_size) print("Coordinates shape:",layer.shape) with tf.variable_scope("LSTM"): layer = tf.squeeze(layer, squeeze_dims=1) print("SEQ shape:",layer.shape) layer = LSTM(layer, 256+embedd_size, seq_len) # N, W*H, 256 print("lstm shape:",layer.shape) return layer, temp_layer
def built_network(self, inputs, is_training, dropout_rate): with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50(inputs, self.opt.num_classes, is_training=is_training) net = tf.squeeze(net, axis=[1, 2]) return net
def resnet_model(images, is_training, reuse=tf.AUTO_REUSE): with tf.contrib.framework.arg_scope(resnet_v2.resnet_arg_scope()): resnet_fn = resnet_v2.resnet_v2_50 logits, _ = resnet_fn(images, num_classes, is_training=is_training, reuse=reuse) logits = tf.reshape(logits, [-1, num_classes]) return logits
def deeplab_v3_plus(inputs, is_training, output_stride, pre_trained_model): with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)): logits, end_points = resnet_v2.resnet_v2_101(inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=output_stride) if is_training: exclude = ['resnet_v2_101' + '/logits', 'global_step'] variables_to_restore = tf.contrib.slim.get_variables_to_restore(exclude=exclude) tf.train.init_from_checkpoint(pre_trained_model, {v.name.split(':')[0]: v for v in variables_to_restore}) net = end_points['resnet_v2_101' + '/block4'] encoder_output = aspp(net, output_stride, is_training) with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=_BATCH_NORM_DECAY)): with tf.name_scope('low_level_features'): low_level_features = end_points['resnet_v2_101' + '/block1/unit_3/bottleneck_v2/conv1'] in_channels = low_level_features.get_shape().as_list()[-1] low_level_shape = tf.shape(low_level_features) weight_1x1_low_level = weight_variable([1, 1, in_channels, 48], name='weight_1x1_low_level') conv_1x1_low_level = tf.nn.conv2d(low_level_features, weight_1x1_low_level, [1, 1, 1, 1], padding='SAME', name='conv_1x1_low_level') conv_1x1_low_level = tf.nn.relu(batch_norm(conv_1x1_low_level, is_training), name='relu_1x1_low_level') low_level_features_size = low_level_shape[1:3] with tf.name_scope("upsamling_logits"): net = tf.image.resize_bilinear(encoder_output, low_level_features_size, name='upsample_1') net = tf.concat([net, low_level_features], axis=-1, name='concat') weight_3x3_upsamle_1 = weight_variable([3, 3, net.get_shape().as_list()[-1], 256], name='weight_3x3_upsamle_1') weight_3x3_upsamle_2 = weight_variable([3, 3, 256, 256], name='weight_3x3_upsamle_2') weight_3x3_upsamle_3 = weight_variable([1, 1, 256, CLASSES], name='weight_3x3_upsamle_3') # => weight_1x1_upsamle_3 bias = bias_variable([CLASSES], name='softmax_bias') net = tf.nn.conv2d(net, weight_3x3_upsamle_1, [1, 1, 1, 1], padding='SAME', name='conv_3x3_upsamle_1') net = tf.nn.relu(batch_norm(net, is_training), name='conv_3x3_relu_1') net = tf.nn.conv2d(net, weight_3x3_upsamle_2, [1, 1, 1, 1], padding='SAME', name='conv_3x3_upsamle_2') net = tf.nn.relu(batch_norm(net, is_training), name='conv_3x3_relu_2') net = tf.nn.conv2d(net, weight_3x3_upsamle_3, [1, 1, 1, 1], padding='SAME', name='conv_1x1_upsamle_3') + bias logits = tf.image.resize_bilinear(net, tf.shape(inputs)[1:3], name='upsample_2') return logits
def build(self): with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, self.endpoints = resnet_v2.resnet_v2_50( self.inputs['images'], num_classes=self.num_classes, is_training=self.is_training) self.outputs['logits'] = tf.reshape(logits, [-1, self.num_classes]) self.outputs['argmax'] = tf.argmax(self.outputs['logits'], axis=1, name='output/predict')
def built_network(self, inputs1, inputs2, is_training=False): inputs = tf.concat([inputs1, inputs2], axis=0) with tf.variable_scope("Seg"): with tf.contrib.slim.arg_scope( resnet_v2.resnet_arg_scope( batch_norm_decay=_BATCH_NORM_DECAY)): logits, end_points = resnet_v2.resnet_v2_50( inputs, is_training=is_training) return end_points
def atrous_spatial_pyramid_pooling(self, inputs, output_stride, batch_norm_decay, is_training, depth=256): """Atrous Spatial Pyramid Pooling. Args: inputs: A tensor of size [batch, height, width, channels]. output_stride: The ResNet unit's stride. Determines the rates for atrous convolution. the rates are (6, 12, 18) when the stride is 16, and doubled when 8. batch_norm_decay: The moving average decay when estimating layer activation statistics in batch normalization. is_training: A boolean denoting whether the input is for training. depth: The depth of the ResNet unit output. Returns: The atrous spatial pyramid pooling output. """ with tf.variable_scope("aspp"): if output_stride not in [8, 16]: raise ValueError('output_stride must be either 8 or 16.') atrous_rates = [6, 12, 18] if output_stride == 8: atrous_rates = [2*rate for rate in atrous_rates] with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): with arg_scope([layers.batch_norm], is_training=is_training): inputs_size = tf.shape(inputs)[1:3] # (a) one 1x1 convolution and three 3x3 convolutions with rates = (6, 12, 18) when output stride = 16. # the rates are doubled when output stride = 8. conv_1x1 = layers_lib.conv2d( inputs, depth, [1, 1], stride=1, scope="conv_1x1") conv_3x3_1 = layers_lib.conv2d( inputs, depth, [3, 3], stride=1, rate=atrous_rates[0], scope='conv_3x3_1') conv_3x3_2 = layers_lib.conv2d( inputs, depth, [3, 3], stride=1, rate=atrous_rates[1], scope='conv_3x3_2') conv_3x3_3 = layers_lib.conv2d( inputs, depth, [3, 3], stride=1, rate=atrous_rates[2], scope='conv_3x3_3') # (b) the image-level features with tf.variable_scope("image_level_features"): # global average pooling image_level_features = tf.reduce_mean( inputs, [1, 2], name='global_average_pooling', keepdims=True) # 1x1 convolution with 256 filters( and batch normalization) image_level_features = layers_lib.conv2d(image_level_features, depth, [ 1, 1], stride=1, scope='conv_1x1') # bilinearly upsample features image_level_features = tf.image.resize_bilinear( image_level_features, inputs_size, name='upsample') net = tf.concat([conv_1x1, conv_3x3_1, conv_3x3_2, conv_3x3_3, image_level_features], axis=3, name='concat') net = layers_lib.conv2d( net, depth, [1, 1], stride=1, scope='conv_1x1_concat') return net
def get_backbone(self,features): if self.flags.model_variant.startswith('xception'): assert False,'not implement' elif self.flags.model_variant=='resnet_v2_50': # inputs has shape [batch, 513, 513, 3] with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) elif self.flags.model_variant=='resnet_v1_50': # The key difference of the full preactivation 'v2' variant compared to the # 'v1' variant in [1] is the use of batch normalization before every weight layer. with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) elif self.flags.model_variant=='resnet_v2_101': # inputs has shape [batch, 513, 513, 3] with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_101(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) elif self.flags.model_variant=='resnet_v1_101': # The key difference of the full preactivation 'v2' variant compared to the # 'v1' variant in [1] is the use of batch normalization before every weight layer. with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) else: assert False,'not implement' print(end_points.keys()) print(net)
def resnet_model(images, is_training, reuse=tf.AUTO_REUSE): with tf.contrib.framework.arg_scope(resnet_v2.resnet_arg_scope()): resnet_fn = RESNET_MODELS[model_name] logits, _ = resnet_fn(images, num_classes, is_training=is_training, reuse=reuse) logits = tf.reshape(logits, [ -1, num_classes ]) # from [bs x 1 x 1 x num_classes] to [bs x num_classes] return logits
def __init__(self, inputs, true_labels, is_train=False, num_classes=None): self.true_labels = true_labels self.NUM_CLASSES = num_classes with slim.arg_scope(resnet_v2.resnet_arg_scope()): self.output, self.features = resnet_v2.resnet_v2_50( inputs=inputs, num_classes=None, is_training=False) self.classifier, _ = cnn.fc(input=self.forward_pass(), num_outputs=self.NUM_CLASSES, use_relu=False, name='classifier')
def DeepLabNet(input_batch, is_training, num_classes, output_stride = 16, batch_norm_decay = 0.9997, backbone = 'resnet_v2_101'): #Use channels_first to boost on GPU #Deeplab V3+ with resnet as backbone inputs_size = tf.shape(input_batch)[1:3] with tf.variable_scope('deeplab'): #ResNet as the encoder with tf.variable_scope('encoder'): if backbone == 'resnet_v2_50': base_model = resnet_v2.resnet_v2_50 else: base_model = resnet_v2.resnet_v2_101 # #Implement tensorflow resnetV2 with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=batch_norm_decay)): logits, end_points = base_model(input_batch, num_classes=None, is_training=is_training, global_pool=False, output_stride=output_stride) #ASPP in the middle layers with tf.variable_scope('aspp'): net = end_points['deeplab/encoder/' + backbone + '/block4'] encoder_output = ASPP(net, output_stride, batch_norm_decay, is_training) # #Decoder with tf.variable_scope('decoder'): with tf.contrib.slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay = batch_norm_decay)): with arg_scope([layers.batch_norm], is_training=is_training): with tf.variable_scope("low_level_features"): low_level_features = end_points['deeplab/encoder/' + backbone + '/block1/unit_3/bottleneck_v2/conv1'] low_level_features = layers_lib.conv2d(low_level_features, 48, [1,1], stride = 1, scope = 'conv_1x1') low_level_features_size = tf.shape(low_level_features)[1:3] with tf.variable_scope("upsampling_logits"): net = tf.image.resize_bilinear(encoder_output, low_level_features_size, name = 'upsample_1') net = tf.concat([net, low_level_features], axis = 3, name = 'concat') net = layers_lib.conv2d(net, 256, [3,3], stride = 1, scope = 'conv_3x3_1') net = layers_lib.conv2d(net, 256, [3,3], stride = 1, scope = 'conv_3x3_2') net = layers_lib.conv2d(net, num_classes, [1, 1], activation_fn = None, normalizer_fn = None, scope = 'conv_1x1') logits = tf.image.resize_bilinear(net, inputs_size, name = 'upsample_2') # return logits
def build(self, images): """Builds a ResNet50 embedder for the input images. It assumes that the range of the pixel values in the images tensor is [0,255] and should be castable to tf.uint8. Args: images: a tensor that contains the input images which has the shape of NxTxHxWx3 where N is the batch size, T is the maximum length of the sequence, H and W are the height and width of the images and C is the number of channels. Returns: The embedding of the input image with the shape of NxTxL where L is the embedding size of the output. Raises: ValueError: if the shape of the input does not agree with the expected shape explained in the Args section. """ shape = images.get_shape().as_list() if len(shape) != 5: raise ValueError( 'The tensor shape should have 5 elements, {} is provided'.format( len(shape))) if shape[4] != 3: raise ValueError('Three channels are expected for the input image') images = tf.cast(images, tf.uint8) images = tf.reshape(images, [shape[0] * shape[1], shape[2], shape[3], shape[4]]) with slim.arg_scope(resnet_v2.resnet_arg_scope()): def preprocess_fn(x): x = tf.expand_dims(x, 0) x = tf.image.resize_bilinear(x, [299, 299], align_corners=False) return(tf.squeeze(x, [0])) images = tf.map_fn(preprocess_fn, images, dtype=tf.float32) net, _ = resnet_v2.resnet_v2_50( images, is_training=False, global_pool=True) output = tf.reshape(net, [shape[0], shape[1], -1]) return output