def forward(self, input_tensor, is_training): # inputs has shape [batch, 513, 513, 3] input_tensor = tf.image.resize_images(input_tensor, [512, 512]) with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training)): net, end_points = resnet_v1.resnet_v1_101(input_tensor, None, global_pool=False, output_stride=16) print(net.get_shape()) h = L.convolution2d_transpose(net, 64, [5, 5], [4, 4], activation_fn=None) h = tf.nn.relu(h) h = L.dropout(h, keep_prob=0.5, is_training=is_training) h = L.convolution2d_transpose(h, 32, [5, 5], [2, 2], activation_fn=None) h = tf.nn.relu(h) h = L.dropout(h, keep_prob=0.5, is_training=is_training) print(h) h = L.convolution2d(h, len(self.classes) + 1, [1, 1], [1, 1], activation_fn=None) print(h) return h
def resnet_v1_101_base(input_image): #input_image = tf.expand_dims(tf_image_std, axis=0) net = input_image with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101(input_image, num_classes=None, global_pool=False, output_stride=16, is_training=True) return net
def _build_graph(self, inputs): orig_image = inputs[0] mean = tf.get_variable('resnet_v1_'+str(args.depth)+'/mean_rgb', shape=[3]) with tp.symbolic_functions.guided_relu(): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=False)): image = tf.expand_dims(orig_image - mean, 0) if args.depth == 50: logits, _ = resnet_v1.resnet_v1_50(image, 1000) elif args.depth == 101: logits, _ = resnet_v1.resnet_v1_101(image, 1000) else: logits, _ = resnet_v1.resnet_v1_152(image, 1000) tp.symbolic_functions.saliency_map(logits, orig_image, name="saliency")
def resnet_101_CAM(self, inputs, keep_prob, resnet_mode_flag): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=True)): net, end_points = resnet_v1.resnet_v1_101(inputs) cam_conv = end_points['resnet_v1_101/block4'] net = self.flatten(net) net = slim.fully_connected(net, self.num_classes, activation_fn=None, scope='out_classification') return net, cam_conv
def build_model(self, is_training=True, dropout_keep_prob=0.5): self.inputs = tf.placeholder(real_type(self.FLAGS), [self.FLAGS.batch_size, 224, 224, 3]) self.targets = tf.placeholder(tf.int32, [self.FLAGS.batch_size]) with slim.arg_scope(resnet_utils.resnet_arg_scope(is_training)): logits, endpoints = resnet_v1.resnet_v1_101( self.inputs, self.FLAGS.num_classes) logits = tf.squeeze(logits) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.targets) self.cost = tf.reduce_sum(loss) self.global_step = tf.contrib.framework.get_or_create_global_step() self.train_op = tf.train.AdagradOptimizer(0.01).minimize( loss, global_step=self.global_step)
def forward_network(self, input_, scope="resnet101", reuse=False): with tf.variable_scope(scope, reuse=reuse) as vs: _, end_points = resnet_v1.resnet_v1_101( input_, 1000, is_training=self.is_training) import pdb pdb.set_trace() net = end_points[scope + '/resnet_v1_101/block4'] output_ = tf.reshape(net0, [ -1, net.get_shape().as_list()[1] * net.get_shape().as_list()[2] * net.get_shape().as_list()[3] ], name='reshape') variables = tf.contrib.framework.get_variables(vs) return output_, variables
def encode_with_resnet(self, images, global_pool=False, output_stride=8): self.global_pool = global_pool # needed for artus convolution self.output_stride = output_stride # needed for artus convolution with slim.arg_scope(resnet_utils.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101( images, global_pool=self.global_pool, output_stride=self.output_stride) #size = tf.slice(tf.shape(images), [1], [2]) #TODO: multiply by 0.5 size = [FLAGS.output_height, FLAGS.output_width] #TODO: chenged fixed size resized_logits = tf.image.resize_images( logits, size, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) return resized_logits
def def_net(self): if net == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_16(self.images, num_classes=FLAGS.num_classes, dropout_keep_prob=1.0, is_training=False) elif net == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_101( self.images, num_classes=FLAGS.num_classes) elif net == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=True)): _, end_points = resnet_v1.resnet_v1_50( self.images, num_classes=FLAGS.num_classes) else: raise Exception('No network matched with net %s' % net) self.end_points = end_points
def resnet_101(self, inputs, keep_prob, resnet_mode_flag): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=True)): net, end_points = resnet_v1.resnet_v1_101(inputs) net = slim.dropout(net, keep_prob, scope='net') fc_classification = slim.fully_connected(net, 2048, scope='fc_classification') fc_classification = slim.dropout(fc_classification, keep_prob, scope='dropout_fc_classification') out_classification = slim.fully_connected(fc_classification, self.num_classes, scope='out_classification', activation_fn=None) return out_classification
def get_backbone(self,features): if self.flags.model_variant.startswith('xception'): assert False,'not implement' elif self.flags.model_variant=='resnet_v2_50': # inputs has shape [batch, 513, 513, 3] with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) elif self.flags.model_variant=='resnet_v1_50': # The key difference of the full preactivation 'v2' variant compared to the # 'v1' variant in [1] is the use of batch normalization before every weight layer. with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) elif self.flags.model_variant=='resnet_v2_101': # inputs has shape [batch, 513, 513, 3] with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_101(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) elif self.flags.model_variant=='resnet_v1_101': # The key difference of the full preactivation 'v2' variant compared to the # 'v1' variant in [1] is the use of batch normalization before every weight layer. with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101(features, self.num_classes, is_training=False, global_pool=False, output_stride=self.output_stride) else: assert False,'not implement' print(end_points.keys()) print(net)
def get_network_by_name( self, net_name, inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, ): if net_name == 'resnet_v1_50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=0.0001)): logits, end_points = resnet_v1.resnet_v1_50( inputs=inputs, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, ) return logits, end_points if net_name == 'resnet_v1_101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=0.0001)): logits, end_points = resnet_v1.resnet_v1_101( inputs=inputs, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, ) return logits, end_points if net_name == 'vgg_19': with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=0.0001)): logits, end_points = vgg.vgg_19( inputs=inputs, num_classes=num_classes, is_training=is_training, ) return logits, end_points
def resnet_v1_101_fcn(input_image, num_classes, upsample=16, is_training=True): with slim.arg_scope(resnet_v1.resnet_arg_scope()): res_logits, end_points = resnet_v1.resnet_v1_101( input_image, num_classes, is_training=is_training, global_pool=False, output_stride=upsample) upsample_factor = upsample filter_16 = tf.constant( bilinear_upsample_weights(factor=upsample_factor, number_of_classes=num_classes)) l_shape = tf.shape(res_logits) output_shape = tf.stack([ l_shape[0], upsample_factor * l_shape[1], upsample_factor * l_shape[2], l_shape[3] ]) tf_logits_4d = tf.nn.conv2d_transpose( res_logits, filter_16, output_shape, strides=[1, upsample_factor, upsample_factor, 1]) return tf_logits_4d
from models import vgg_train import numpy as np from tensorflow.contrib.slim.nets import vgg as vgg from tensorflow.contrib.slim.nets import resnet_v2 as resnet_v2 from tensorflow.contrib.slim.nets import resnet_v1 as resnet_v1 from tensorflow.python.client import device_lib import os import cv2 os.environ['CUDA_VISIBLE_DEVICES'] = '1' loss_unbalance_w = 1.2 print([x.name for x in device_lib.list_local_devices() if x.device_type == 'GPU']) tfx = tf.placeholder(tf.float32, [None, 224, 224, 1]) tfy = tf.placeholder(tf.float32, [None, 2]) # out, end_points = vgg.vgg_16(tfx, num_classes=2, ) # 将VGG16升级为VGG19试试呢 out, end_points = resnet_v1.resnet_v1_101(tfx, num_classes=2, ) # 将VGG16升级为VGG19试试呢 out = tf.reshape(out, (-1, 2)) # fc8, end_points = vgg.vgg_19(tfx, num_classes=2, spatial_squeeze=False) # 将VGG16升级为VGG19试试呢 # net_flatten = tf.reshape(fc8, [-1, 1*6*2]) # out = tf.layers.dense(net_flatten, 2, name='vgg_out') loss = tf.losses.softmax_cross_entropy(tfy, out) # bb = tf .nn.softmax(out) # loss = -tf.reduce_mean(tfy[0][0]*tf.log(tf.clip_by_value(bb[0][0], 1e-15, 1.0)) + tfy[0][1]*tf.log(tf.clip_by_value(bb[0][0], 1e-15, 1.0))*loss_unbalance_w) train_op = tf.train.MomentumOptimizer(0.0005, 0.9).minimize(loss) # train_op = tf.train.MomentumOptimizer(0.0005, 0.9).minimize(loss) # out, end_points = vgg.vgg_16(tfx, num_classes=2) # loss = tf.losses.softmax_cross_entropy(tfy, out) # train_op = tf.train.MomentumOptimizer(0.0005, 0.9).minimize(loss) correct_prediction = tf.equal( tf.argmax(out, 1), tf.argmax(tfy, 1))
# placeholder for input and output img = tf.placeholder(tf.float32, shape=[batch_size, 224, 224, 3]) # image tag = tf.placeholder(tf.float32, shape=[batch_size, num_noisy_tags]) # noisy tags ex.) [1 0 0 1 0] y = tf.placeholder(tf.float32, shape=[batch_size, num_classes]) q = tf.reduce_sum(y, 1) # quantity keep_prob = tf.placeholder(tf.float32) is_training = tf.placeholder(tf.bool) # model # resnet_v1 101 with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101(img, num_classes, is_training=False) net_logit = tf.squeeze(net) # tensorflow operation for load pretrained weights variables_to_restore = get_variables_to_restore( exclude=['resnet_v1_101/logits', 'resnet_v1_101/AuxLogits']) init_fn = assign_from_checkpoint_fn('resnet_v1_101.ckpt', variables_to_restore) # multiscale resnet_v1 101 visual_features, fusion_logit = multiscale_resnet101(end_points, num_classes, is_training) textual_features, textual_logit = mlp(tag, num_classes, is_training) refined_features = tf.concat([visual_features, textual_features], 1) # score is prediction score, and k is label quantity
def resnet_v1_101 (inputs, is_training, num_classes): logits, _ = resnet_v1.resnet_v1_101(inputs, num_classes) logits = tf.squeeze(logits, [1,2]) # resnet output is (N,1,1,C, remove the return tf.identity(logits, name='logits')