def testDivisibleBy(self): tf.reset_default_graph() mobilenet_v2.mobilenet( tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=mobilenet_v2.V2_DEF, divisible_by=16, min_depth=32) s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')] s = set(s) self.assertSameElements([32, 64, 96, 160, 192, 320, 384, 576, 960, 1280, 1001], s)
def testDivisibleBy(self): tf.reset_default_graph() mobilenet_v2.mobilenet( tf.placeholder(tf.float32, (10, 224, 224, 16)), conv_defs=mobilenet_v2.V2_DEF, divisible_by=16, min_depth=32) s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')] s = set(s) self.assertSameElements([32, 64, 96, 160, 192, 320, 384, 576, 960, 1280, 1001], s)
def testDivisibleByWithArgScope(self): tf.reset_default_graph() # Verifies that depth_multiplier arg scope actually works # if no default min_depth is provided. with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32): mobilenet_v2.mobilenet( tf.placeholder(tf.float32, (10, 224, 224, 2)), conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1) s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')] s = set(s) self.assertSameElements(s, [32, 192, 128, 1001])
def testDivisibleByWithArgScope(self): tf.reset_default_graph() # Verifies that depth_multiplier arg scope actually works # if no default min_depth is provided. with slim.arg_scope((mobilenet.depth_multiplier,), min_depth=32): mobilenet_v2.mobilenet( tf.placeholder(tf.float32, (10, 224, 224, 2)), conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.1) s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')] s = set(s) self.assertSameElements(s, [32, 192, 128, 1001])
def testFineGrained(self): tf.reset_default_graph() # Verifies that depth_multiplier arg scope actually works # if no default min_depth is provided. mobilenet_v2.mobilenet( tf.placeholder(tf.float32, (10, 224, 224, 2)), conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.01, finegrain_classification_mode=True) s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')] s = set(s) # All convolutions will be 8->48, except for the last one. self.assertSameElements(s, [8, 48, 1001, 1280])
def testFineGrained(self): tf.reset_default_graph() # Verifies that depth_multiplier arg scope actually works # if no default min_depth is provided. mobilenet_v2.mobilenet( tf.placeholder(tf.float32, (10, 224, 224, 2)), conv_defs=mobilenet_v2.V2_DEF, depth_multiplier=0.01, finegrain_classification_mode=True) s = [op.outputs[0].get_shape().as_list()[-1] for op in find_ops('Conv2D')] s = set(s) # All convolutions will be 8->48, except for the last one. self.assertSameElements(s, [8, 48, 1001, 1280])
def __init__(self, checkpoint='../mobilenet_v2_1.0_224.ckpt'): # save the checkpoint self.checkpoint = checkpoint tf.reset_default_graph() # placeholder for the image input, need to decode the file self.file_in = tf.placeholder(tf.string, ()) image = tf.image.decode_jpeg(tf.read_file(self.file_in)) # expand for batch then cast to between -1 and 1 inputs = tf.expand_dims(image, 0) inputs = (tf.cast(inputs, tf.float32) / 128) - 1 # ensure that it only has three dimensions and resize to 224x224 inputs.set_shape((None, None, None, 3)) inputs = tf.image.resize_images(inputs, (224, 224)) # get the endpoints of the network with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): _, self.endpoints = mobilenet_v2.mobilenet(inputs) # Restore using exponential moving average since it produces (1.5-2%) higher # accuracy ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) # create the label map from imagenet, same thing self.label_map = imagenet.create_readable_names_for_imagenet_labels() # create session and restore the checkpoint downloaded self.sess = tf.Session() saver.restore(self.sess, self.checkpoint)
def load_mobilenet_v2(model_dir, sess): model_url = "https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz" filename = model_url.split("/")[-1] filepath = os.path.join(model_dir, filename.split(".tgz")[0]) try: utils.download_pretrained_model_weights(model_url, filepath, unzip=True) except: print("Pre-training weights download failed!") model_file_name = "mobilenet_v2_1.4_224.ckpt" model_path = os.path.join(filepath, model_file_name) resized_input_tensor = tf.placeholder(tf.float32, shape=[None, None, None, 3]) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): bottleneck_tensor, _ = mobilenet_v2.mobilenet(resized_input_tensor, num_classes=None, depth_multiplier=1.4) variable_restore_op = tf.contrib.slim.assign_from_checkpoint_fn( model_path, tf.contrib.slim.get_trainable_variables(), ignore_missing_vars=True) variable_restore_op(sess) # bottleneck_tensor = tf.squeeze(bottleneck_tensor, axis=[1, 2]) bottleneck_tensor_size = 1792 return bottleneck_tensor, resized_input_tensor, bottleneck_tensor_size
def net(image, classes): #encoding - convolution/pooling with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)): logits, endpoints = mobilenet_v2.mobilenet(image, num_classes=None) logits = endpoints["layer_10/output"] print(logits.get_shape()) #new_size = (16,32) #resize = tf.image.resize(logits, new_size, align_corners=True) #conv = util.conv(resize, [3,3,512,320], "up_1", pad="SAME") #new_size = (64,128) #resize = tf.image.resize(logits, new_size, align_corners=True) #conv = util.conv(resize, [3,3,256,512], "up_2", pad="SAME") new_size = (192,256) resize = tf.image.resize(logits, new_size, align_corners=True) conv = util.conv(resize, [3,3,128,256], "up_3", pad="SAME") conv6 = util.conv(conv, [1,1,128,classes], "c6", pad="SAME") softmax = tf.nn.softmax(conv6) return conv6, tf.argmax(softmax, axis=3), softmax
def create_inference_graph(self, input_image, base_graph): util.download(self.params.CHECKPOINT_TARBALL_URI, self.params.MODEL_BASEDIR) self.graph = base_graph with self.graph.as_default(): input_image = tf.cast(input_image, tf.float32) / 128. - 1 input_image.set_shape(self.params.INPUT_TENSOR_SHAPE) from nets.mobilenet import mobilenet_v2 with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): # See also e.g. mobilenet_v2_035 self.logits, self.endpoints = mobilenet_v2.mobilenet( input_image, is_training=False, depth_multiplier=self.params.DEPTH_MULTIPLIER, finegrain_classification_mode=self.params.FINE) # Per authors: Restore using exponential moving average since it produces # (1.5-2%) higher accuracy ema = tf.train.ExponentialMovingAverage(0.999) vs = ema.variables_to_restore() saver = tf.train.Saver(vs) checkpoint = os.path.join( self.params.MODEL_BASEDIR, self.params.CHECKPOINT + '.ckpt') nodes = list(self.output_names) + [input_image] self.graph = util.give_me_frozen_graph( checkpoint, nodes=self.output_names, base_graph=self.graph, saver=saver) return self.graph
def _build_model(self, inputs, is_training=True): with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=is_training)): logits, endpoints = mobilenet_v2.mobilenet(inputs, num_classes=self.config.num_outputs) ema = tf.train.ExponentialMovingAverage(0.999) self.mobile_net_vars = [var for var in tf.trainable_variables() if var.name.startswith("Mobilenet") and "Logits" not in var.name] return logits, endpoints
def __call__(self, inputs, castFromUint8=True): pr_shape = lambda var : print(var.shape) if castFromUint8: inputs = tf.cast(inputs, self.dtype) # print(inputs.dtype) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope( is_training=self.is_training)): # print(inputs.dtype) global_pool, endpoints = mobilenet_v2.mobilenet(inputs, num_classes=None) self.variables_to_restore = slim.get_variables() # len 260 # 后加两层fc dropout_keep_prob = 0.5 weight_decay = 0.05 with tf.variable_scope('additional', 'fc'): # flatten = tf.flatten(endpoints['global_pool']) flatten = slim.flatten(global_pool) with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer(weight_decay), weights_initializer = tc.layers.xavier_initializer(tf.float32), # weights_initializer=tf.truncated_normal_initializer(stddev=0.1), activation_fn=None) as sc: net = slim.fully_connected(flatten, 128, activation_fn=None, scope='fc1') net = slim.dropout(net, dropout_keep_prob, is_training=self.is_training, scope='dropout') logits = slim.fully_connected(net, self.n_classes, activation_fn=None, scope='fc2') # 多出来的4个参数保存 共264 self.variables_to_save = slim.get_variables() for var in self.variables_to_save: if var in self.variables_to_restore: continue self.variables_to_train.append(var) # pr_shape(out) return logits
def mobilenet_v2_100(inputs, is_training, opts): if is_training: with slim.arg_scope(mobilenet_v2.training_scope( weight_decay=opts.weight_decay, stddev=0.09, bn_decay=opts.batch_norm_decay)): return mobilenet_v2.mobilenet( inputs, num_classes=opts.num_classes, depth_multiplier=1.0, reuse=None) else: return mobilenet_v2.mobilenet( inputs, num_classes=opts.num_classes, depth_multiplier=1.0, reuse=None)
def Encoder_mobilenet(x, is_training=True, weight_decay=0.001, reuse=False): # from nets.mobilenet import mobilenet_v2 from nets.mobilenet import mobilenet_v2 with slim.arg_scope(mobilenet_v2.training_scope()): net, endpoints = mobilenet_v2.mobilenet(x) variables = tf.contrib.framework.get_variables('mobilenet_v2') return net, variables
def testImageSizes(self): for input_size, output_size in [(224, 7), (192, 6), (160, 5), (128, 4), (96, 3)]: tf.reset_default_graph() _, ep = mobilenet_v2.mobilenet( tf.placeholder(tf.float32, (10, input_size, input_size, 3))) self.assertEqual(ep['layer_18/output'].get_shape().as_list()[1:3], [output_size] * 2)
def testImageSizes(self): for input_size, output_size in [(224, 7), (192, 6), (160, 5), (128, 4), (96, 3)]: tf.reset_default_graph() _, ep = mobilenet_v2.mobilenet( tf.placeholder(tf.float32, (10, input_size, input_size, 3))) self.assertEqual(ep['layer_18/output'].get_shape().as_list()[1:3], [output_size] * 2)
def test_model_from_img(img_path): img_list = read_img(img_path) # placeholder holds an input tensor for classification input_images = tf.placeholder(dtype=tf.float32, shape=[None, R.resize_height, R.resize_width, R.depths], name='input') out, end_points = mobilenet_v2.mobilenet(input_tensor=input_images, num_classes=R.num_classes, depth_multiplier=R.depth_multiplier, is_training=False) saver = tf.train.Saver() with tf.Session() as sess: # restore variables that have been trained. saver.restore(sess, R.CKPT) label = str() for s in R.labelsStr: label += s + ' ' print(label) acc = 0 for img_name in img_list: dirname, filename = os.path.split(img_name) dirname = dirname.split(os.sep)[-1] img_raw_data = tf.gfile.FastGFile(img_name, 'rb').read() img_data = tf.image.decode_jpeg(img_raw_data) # img_data = tf.image.per_image_standardization(img_data) img_data = tf.image.convert_image_dtype(img_data, dtype=tf.float32) # elements are in [0,1) resized_img = tf.image.resize_images(img_data, size=[R.resize_height, R.resize_width], method=0) # decode an image img = resized_img.eval(session=sess) img.resize([1, R.resize_height, R.resize_width, R.depths]) # input an image array and inference to get predictions and set normal format predictions = end_points['Predictions'].eval(session=sess, feed_dict={input_images: img}) predictions.resize([R.num_classes]) np.set_printoptions(precision=4, suppress=True) index = np.argmax(predictions) print('Predict[{: ^7s}] is {}.'.format(dirname, str(predictions))) if R.labelsDict[dirname] == index: acc += 1 else: print(" --- Wrong: Mistake " + dirname + " for " + R.labelsStr[index]) # wrong[dirname] = value print("The accuracy of this test is {:.3f} - {}/{}".format(acc/len(img_list), acc, len(img_list))) ''' for key, val in wrong.items(): img = cv2.imread(key, 0) cv2.imshow(val, img) k = cv2.waitKey(0) if k == 27: # wait for ESC key to exit cv2.destroyAllWindows() break cv2.destroyAllWindows() ''' print('Bye.')
def train_kfold(record_file, train_log_step, train_param, val_log_step, num_classes, data_shape, snapshot, snapshot_prefix): [base_lr, max_steps] = train_param [batch_size, resize_height, resize_width, depths] = data_shape # ============================================================================================================ # Define the model: [core] with slim.arg_scope( mobilenet_v2.training_scope(dropout_keep_prob=R.dropout)): out, end_points = mobilenet_v2.mobilenet( input_tensor=input_images, num_classes=num_classes, depth_multiplier=R.depth_multiplier, is_training=is_training) # Specify the loss function: tf.losses定义的loss函数都会自动添加到loss函数, 无需 # slim.losses.add_loss(my_loss) tf.losses.softmax_cross_entropy(onehot_labels=input_labels, logits=out) # 添加交叉熵损失loss=1.6 loss = tf.losses.get_total_loss( add_regularization_losses=True) # 添加正则化损失loss=2.2 accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(input_labels, 1)), tf.float32)) # Specify the optimization scheme: optimizer = tf.train.GradientDescentOptimizer(learning_rate=base_lr) ''' global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(0.05, global_step, 150, 0.9) optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) train_tensor = optimizer.minimize(loss, global_step) train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step) ''' # 在定义训练的时�? 注意到我们使用了`batch_norm`层时,需要更新每一层的`average`和`variance`参数, # 更新的过程不包含在正常的训练过程�? 需要我们去手动像下面这样更�? # 通过`tf.get_collection`获得所有需要更新的`op` # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # 使用`tensorflow`的控制流, 先执行更新算�? 再执行训�? # with tf.control_dependencies(update_ops): # create_train_op that ensures that when we evaluate it to get the loss, # the update_ops are done and the gradient updates are computed. train_op = slim.learning.create_train_op(total_loss=loss, optimizer=optimizer) # ================================================================================================================ # 从record中读取图片和labels数据 all_nums = get_example_nums(record_file) all_images, all_labels = read_records(record_file, resize_height, resize_width, type='normalization', is_train=None) all_images_batch, all_labels_batch = get_batch_images( all_images, all_labels, batch_size=batch_size, labels_nums=num_classes, one_hot=True, shuffle=True)
def inspect_module(): features = tf.zeros([8, 224, 224, 3], name='input') with tf.variable_scope('TestSSD', default_name=None, values=[features], reuse=tf.AUTO_REUSE): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet(features) for key in endpoints: print(key, endpoints[key])
def encode(self, input_tensor, name): """ 根据MobileNet框架对输入的tensor进行编码 :param input_tensor: :param name: :param flags: :return: 输出MobileNet编码特征 """ ret = OrderedDict() with tf.variable_scope(name): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=True)): net, end_points = mobilenet_v2.mobilenet(input_tensor, base_only=True) # # Version B # ret['layer_5'] = dict() # ret['layer_5']['data'] = end_points['layer_5'] # ret['layer_5']['shape'] = end_points['layer_5'].get_shape().as_list() # # # ret['layer_8'] = dict() # ret['layer_8']['data'] = end_points['layer_8'] # ret['layer_8']['shape'] = end_points['layer_8'].get_shape().as_list() # # # ret['layer_18'] = dict() # ret['layer_18']['data'] = end_points['layer_18'] # ret['layer_18']['shape'] = end_points['layer_18'].get_shape().as_list() # Version A ret['layer_7'] = dict() ret['layer_7']['data'] = end_points['layer_7'] ret['layer_7']['shape'] = end_points['layer_7'].get_shape( ).as_list() ret['layer_14'] = dict() ret['layer_14']['data'] = end_points['layer_14'] ret['layer_14']['shape'] = end_points['layer_14'].get_shape( ).as_list() ret['layer_19'] = dict() ret['layer_19']['data'] = end_points['layer_19'] ret['layer_19']['shape'] = end_points['layer_19'].get_shape( ).as_list() # ret['end_points'] = end_points return ret
def endpoints(image, is_training): if image.get_shape().ndims != 4: raise ValueError('Input must be of size [batch, height, width, 3]') image = tf.divide(image, 255.0) with tf.contrib.slim.arg_scope( training_scope(bn_decay=0.9, weight_decay=0.0)): _, endpoints = mobilenet(image, num_classes=1001, is_training=is_training) endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean( endpoints['layer_14'], [1, 2], name='global_pool', keep_dims=False) return endpoints, 'MobilenetV2'
def compare_layer_output(net, layer_name, checkpoint, tensor_name, image_file): ### Compare outputs from the same layer (tensor) ### from caffe net and tensorflow graph ### matching name examples: ## tf: MobilenetV2/Conv/Conv2D:0, MobilenetV2/Conv/Relu6:0, MobilenetV2/Conv/BatchNorm/FusedBatchNorm:0 ## caffe: conv1, conv1/relu, conv1/scale def square_error(x, x_): return np.sum(np.square(x - x_)) image = tf_preprocess(image_file) ## caffe inference net.blobs['data'].data[...] = image[...] net.forward() caffe_output = net.blobs[layer_name].data caffe_output = caffe_output.transpose(0, 2, 3, 1) # channel first to last ## tf inference tf.reset_default_graph() images = tf.placeholder(tf.float32, shape=(None, image_scale, image_scale, 3)) with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet(images, num_classes=1001) ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) with tf.Session() as sess: saver.restore(sess, checkpoint) tensor = sess.graph.get_tensor_by_name(tensor_name) tf_output = sess.run(tensor, feed_dict={images: image}) ### compare tf and caffe result of a specific layer ### need graphs and layer (tensor) name in caffe and tf print('...................................') error = 0 for i in range(32): err = square_error(tf_output[0, :, :, i], caffe_output[0, :, :, i]) print('channel', i, err) error += err print('total error:', error) print('...................................') return
def mobilenet(images, depth_multiplier=1.0, is_training=True, verbose=False, **kwargs): """ Base MobileNet architecture Based on https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet Args: images: input images in [0., 1.] depth_multiplier: MobileNet depth multiplier. is_training: training bool for batch norm verbose: verbosity level Kwargs: weight_decay: Regularization constant. Defaults to 0. normalizer_decay: Batch norm decay. Defaults to 0.9 """ del kwargs base_scope = tf.get_variable_scope().name # Input in [0., 1.] -> [-1, 1] with tf.control_dependencies([tf.assert_greater_equal(images, 0.)]): with tf.control_dependencies([tf.assert_less_equal(images, 1.)]): net = (images - 0.5) * 2. # Mobilenet with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=is_training)): if depth_multiplier == 1.0: net, _ = mobilenet_v2.mobilenet(net, base_only=True) elif depth_multiplier == 0.5: net, _ = mobilenet_v2.mobilenet_v2_050(net, base_only=True) elif depth_multiplier == 0.35: net, _ = mobilenet_v2.mobilenet_v2_035(net, base_only=True) # Add a saver to restore Imagenet-pretrained weights saver_collection = '%s_mobilenet_%s_saver' % (base_scope, depth_multiplier) savers = tf.get_collection(saver_collection) if len(savers) == 0: var_list = { x.op.name.replace('%s/' % base_scope, ''): x for x in tf.global_variables(scope=base_scope) } saver = tf.train.Saver(var_list=var_list) tf.add_to_collection(saver_collection, saver) return net
def getMobileNet(checkpoint): graph = tf.Graph() sess = tf.Session(graph=graph) with graph.as_default(): file_input = tf.placeholder(tf.string, ()) image = tf.image.decode_image(tf.read_file(file_input)) images = tf.expand_dims(image, 0) images = tf.cast(images, tf.float32) / 128. - 1 images.set_shape((None, None, None, 3)) images = tf.image.resize_images(images, (224, 224)) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet(images) ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) saver.restore(sess, checkpoint) return sess, graph, endpoints, file_input
def mobilenet_v2(inputs, num_classes, depth_multiplier=1.0, finegrain_classification_mode=True, padding='SAME', flag_global_pool=True): """mobilenet_v2 model""" logits, endpoints = mobilenet_v2_builder.mobilenet( inputs, num_classes=num_classes, depth_multiplier=depth_multiplier, finegrain_classification_mode=finegrain_classification_mode, padding=padding, flag_global_pool=flag_global_pool, ) endpoints['output'] = logits print(logits.shape) return logits, endpoints
def load_mobilenet_v2(model_dir, sess): model_file_name = "mobilenet_v2_1.4_224/mobilenet_v2_1.4_224.ckpt" model_path = os.path.join(model_dir, model_file_name) resized_input_tensor = tf.placeholder(tf.float32, shape=[None, None, None, 3]) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): bottleneck_tensor, _ = mobilenet_v2.mobilenet( resized_input_tensor, num_classes=None, depth_multiplier=1.4) variable_restore_op = tf.contrib.slim.assign_from_checkpoint_fn( model_path, tf.contrib.slim.get_trainable_variables(), ignore_missing_vars=True) variable_restore_op(sess) #bottleneck_tensor = tf.squeeze(bottleneck_tensor, axis=[1, 2]) bottleneck_tensor_size = 1792 return bottleneck_tensor, resized_input_tensor, bottleneck_tensor_size
def fcn_mobv2(images, num_classes, is_training=True): with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): _, end_points = mobilenet_v2.mobilenet(images, num_classes) for v, k in end_points.items(): print('{v}:{k}'.format(v=v, k=k)) # pool4=end_points['resnet_v1_101/pool4'] # # dconv1_out=pool4.get_shape().as_list() # # # deconv1=slim.conv2d_transpose(net,dconv1_out[3],[4,4], stride=2,scope='deconv1') # # fu1=tf.add(deconv1,pool4) # # # pool3=end_points['resnet_v1_101/pool3'] # dconv2_out=pool3.get_shape().as_list() # deconv2=slim.conv2d_transpose(fu1,dconv2_out[3],[4,4], stride=2,scope='deconv2') # # fu2=tf.add(deconv2,pool3) net = end_points['layer_18'] # net_14=end_points['Conv2d_11_pointwise'] # net_28=end_points['Conv2d_5_pointwise'] # up1=slim.conv2d_transpose(net_7,2,[4,4], stride=2,scope='deconv32') # fu1=tf.add(up1,net_14,name='fu1') # # up2=slim.conv2d_transpose(fu1,2,[4,4], stride=2,scope='deconv16') # fu2=tf.add(up2,net_28,name='fu2') logit = slim.conv2d_transpose(net, 2, [64, 64], stride=32, scope='deconv8') prediction = tf.argmax(logit, dimension=3) #, name="prediction") print('logit', logit) return logit, tf.expand_dims(prediction, axis=3)
def _get_endpoints(model_name, img_tensor): if model_name == "res50": with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50(img_tensor, 1000, is_training=False) return end_points["predictions"] elif model_name == "res152": with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152(img_tensor, 1000, is_training=False) return end_points["predictions"] elif model_name.startswith("mobilenet"): with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): _, endpoints = mobilenet_v2.mobilenet(img_tensor) return endpoints["Predictions"]
def main(argv=None): with tf.gfile.Open(FLAGS.labels) as f: labels = [line.strip() for line in f.readlines()] labels_str = tf.constant(list(','.join(labels).encode()), dtype=tf.int32, name='labels') placeholder = tf.placeholder(tf.float32, shape=(None, 96, 96, 3)) logits, _ = mobilenet_v2.mobilenet(placeholder, len(labels)) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, FLAGS.checkpoint_path) tf.saved_model.simple_save(sess, FLAGS.export_dir, inputs={'placeholder': placeholder}, outputs={ 'labels': labels_str, 'output': logits })
#1.2、先构建图结构,再加载权重 #临时添加slim到python搜索路径 import sys sys.path.append('./models/research/slim') #导入mobilenet_v2 from nets.mobilenet import mobilenet_v2 #重置图 tf.reset_default_graph() #导入mobilenet,先构建图结构 #加载完毕后,tf.get_default_graph()中包含了mobilenet计算图结构,可以使用tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)对比reset_graph前后的差异 images = tf.placeholder(tf.float32,(None, 224, 224, 3)) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training = False)): logits, endpoints = mobilenet_v2.mobilenet(images, depth_multiplier = 1.4) #定义saver类,用于恢复图权重 saver = tf.train.Saver() with tf.Session() as sess: #latest_checkpoint检查checkpoint检查点文件,查找最新的模型 #restore恢复图权重 saver.restore(sess, tf.train.latest_checkpoint('./model_ckpt/moilenet_v2')) #get_tensor_by_name通过张量名称获取张量 print(sess.run(tf.get_default_graph().get_tensor_by_name('MoilenetV2/Conv/weights:0')).shape) #1.3、frozen inference """ pb文件将变量取值和计算图整个结构统一放在一个文件中,通过convert_variable_to_constants 将变量及取值转化为常量保存,在模型测试的时候,输入只需要经过前向传播至输出层就可以。
# For simplicity we just decode jpeg inside tensorflow. # But one can provide any input obviously. file_input = tf.placeholder(tf.string, ()) image = tf.image.decode_jpeg(tf.read_file(file_input)) images = tf.expand_dims(image, 0) images = tf.cast(images, tf.float32) / 128. - 1 images.set_shape((None, None, None, 3)) images = tf.image.resize_images(images, (224, 224)) # images = tf.placeholder(tf.float32, (None, 224, 224, 3)) # Note: arg_scope is optional for inference. with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet(images) # Restore using exponential moving average since it produces (1.5-2%) higher # accuracy ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) from datasets import imagenet with tf.Session() as sess: saver.restore(sess, checkpoint) x = endpoints['Predictions'].eval(feed_dict={file_input: 'imgs/dog.jpg'}) # writer = tf.summary.FileWriter("TensorBoard/", graph=sess.graph) # writer.close()
def caffe_load_from_ckpt(prototxt, checkpoint, to_caffemodel): ### load caffe model and weights caffe.set_mode_gpu() net = caffe.Net(prototxt, caffe.TEST) ### load tf model tf.reset_default_graph() images = tf.placeholder(tf.float32, shape=(None, image_scale, image_scale, 3)) with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet( images, num_classes=1001, depth_multiplier=factor, finegrain_classification_mode=True) ema = tf.train.ExponentialMovingAverage(0.999) vars = ema.variables_to_restore() saver = tf.train.Saver(vars) ### convert variables from tf checkpoints to caffemodel with tf.Session() as sess: saver.restore(sess, checkpoint) tf_all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # for i, var in enumerate(tf_all_vars): # print(i, var.name, var.shape.as_list()) print( '------------------------------------------------------------------' ) i = 0 # index for caffe_var_name in net.params.keys(): for n in range(len(net.params[caffe_var_name])): if list(net.params[caffe_var_name][n].data.shape) != [1]: var = tf_all_vars[i] print(i, caffe_var_name, net.params[caffe_var_name][n].data.shape, var.name, var.shape.as_list()) i += 1 # exit() """ tf name scope: convolutional layer: "MobilenetV2/....../...weights:0" "MobilenetV2/....../BatchNorm/gamma:0" "MobilenetV2/....../BatchNorm/beta:0" "MobilenetV2/....../BatchNorm/moving_mean:0" "MobilenetV2/....../BatchNorm/moving_variance:0" fully connected layer: "MobilenetV2/....../...weights:0" "MobilenetV2/....../biases:0" """ # name, shape list # caffe_var: caffe_var_name, list(net.params[caffe_var_name][n].data.shape) # tf_var : tf_var.name, tf_var.shape.as_list() ### 262 variables to convert from tf.ckpt to caffemodel i = 0 # index for caffe_var_name in net.params.keys(): for n in range(len(net.params[caffe_var_name])): if list(net.params[caffe_var_name][n].data.shape) != [1]: ### Compare caffe_var and tf_var here # caffe_var_name = caffe_var_name caffe_var_data = net.params[caffe_var_name][n].data caffe_var_shape = list(caffe_var_data.shape) tf_var_name = tf_all_vars[i].name tf_var_shape = tf_all_vars[i].shape.as_list() if 'weights:0' in tf_var_name: ### weight layer # print(caffe_var_name, caffe_var_shape, '|||||||||||', tf_var_name, tf_var_shape) tf_var_data = sess.run(tf_all_vars[i]) ### swap tf_var axis for caffe_var: ### tf_var shape: (height, width, channel_out, channel_in) for depthwise_weights ### (height, width, channel_in, channel_out) for other weights ### caffe_var shape: (channel_out, channel_in, height, width) tf_var_data = np.transpose(tf_var_data, axes=(3, 2, 0, 1)) if '/depthwise_weights' in tf_var_name: tf_var_data = np.swapaxes(tf_var_data, axis1=0, axis2=1) if 'Logits/' in tf_var_name: ### mismatched num_classes ### tf class 0: 'background' caffe_var_data[:, ...] = tf_var_data[1:, ...] else: caffe_var_data[...] = tf_var_data[...] if 'biases:0' in tf_var_name: ### bias layer # print(caffe_var_name, caffe_var_shape, '|||||||||||', tf_var_name, tf_var_shape) ### tf_var_shape: (1001,) ### caffe_var_shape: (1000,) tf_var_data = sess.run(tf_all_vars[i]) caffe_var_data[:] = tf_var_data[1:] if 'BatchNorm/gamma:0' in tf_var_name: ### batchnorm scaling layer, but convert mean # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i+2].name, tf_all_vars[i+2].shape.as_list()) ### tf_var_shape: (channel,) ### caffe_var_shape: (channel,) tf_var_data = sess.run(tf_all_vars[i + 2]) caffe_var_data[...] = tf_var_data[...] if 'BatchNorm/beta:0' in tf_var_name: ### batchnorm scaling layer, but convert variance # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i+2].name, tf_all_vars[i+2].shape.as_list()) ### tf_var_shape: (channel,) ### caffe_var_shape: (channel,) tf_var_data = sess.run(tf_all_vars[i + 2]) caffe_var_data[...] = tf_var_data[...] # + 1e-3 -1e-5 if 'BatchNorm/moving_mean:0' in tf_var_name: ### batchnorm moving average layer, but convert gamme # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i-2].name, tf_all_vars[i-2].shape.as_list()) ### tf_var_shape: (channel,) ### caffe_var_shape: (channel,) tf_var_data = sess.run(tf_all_vars[i - 2]) caffe_var_data[...] = tf_var_data[...] if 'BatchNorm/moving_variance:0' in tf_var_name: ### batchnorm moving average layer, but convert beta # print(caffe_var_name, n, caffe_var_shape, '|||||||||||', tf_all_vars[i-2].name, tf_all_vars[i-2].shape.as_list()) ### tf_var_shape: (channel,) ### caffe_var_shape: (channel,) tf_var_data = sess.run(tf_all_vars[i - 2]) caffe_var_data[...] = tf_var_data[...] i += 1 else: ### moving average factor, must set to 1 net.params[caffe_var_name][n].data[...] = 1. # print(caffe_var_name, n, list(net.params[caffe_var_name][n].data.shape), '|||||||||||', net.params[caffe_var_name][n].data) net.save(to_caffemodel) print('Save converted caffemodel to', to_caffemodel) return net
from nets.mobilenet import mobilenet_v2 from tensorflow.python.framework import graph_util ckpt_file = "./model/20180402-114759/model-20180402-114759.ckpt-275" output_file = './facenet_mobilenet_lf.pb' with tf.Graph().as_default(): with tf.Session() as sess: input_data = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='input') with tf.contrib.slim.arg_scope( mobilenet_v2.training_scope(is_training=False)): logits, end_points = mobilenet_v2.mobilenet(input_data, num_classes=10575) prelogits = tf.squeeze(end_points['global_pool'], [1, 2]) embeddings = tf.identity(prelogits, 'embeddings') output_node_names = ['input', 'embeddings'] loader = tf.train.Saver() # sess.run(tf.global_variables_initializer()) loader.restore(sess, ckpt_file) builder = tf.saved_model.builder.SavedModelBuilder( './model/saved_model/')
def run_test(read_csv, logger): img_folder = '/home/zgwu/HandImages/long_video/test_frames/' save_folder = '/home/zgwu/HandImages/long_video/double_frames/' if not os.path.exists(save_folder): os.mkdir(save_folder) label_dict = read_test_csv_from(read_csv) input_images = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='input') # placeholder holds an input tensor for classification if ModelType == 'mobilenet': out, end_points = mobilenet_v2.mobilenet(input_tensor=input_images, num_classes=num_classes, depth_multiplier=1.0, is_training=False) else: with slim.arg_scope(inception_v3.inception_v3_arg_scope()): out, end_points = inception_v3.inception_v3( inputs=input_images, num_classes=num_classes, is_training=False) detection_graph, sessd = detector_utils.load_inference_graph( ) # ssd to detect hands saver = tf.train.Saver() sess = tf.Session() saver.restore(sess, CKPT) CM = [[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]] D = { 'num_pictures': 0, 'acc': 0, 'num_hands': 0, 'detect_t': 0.0, 'classify_t': 0.0, 'o': [0, 0, 0, 0, 0, 0, 0], 'd': [0, 0, 0, 0, 0, 0, 0], 'r': [0, 0, 0, 0, 0, 0, 0], 'tp': [0, 0, 0, 0, 0, 0, 0] } tot_count = 0 y_true, y_pred = [], [] label_matrix = np.empty((0, num_classes), dtype=int) score_matrix = np.empty((0, num_classes), dtype=int) for num_img, (img_name, frame_label) in enumerate(label_dict.items()): tot_count += 1 if tot_count % 100 == 1: print('Process {} --- {} / {}'.format(img_name, tot_count, len(label_dict))) l, t, r, b, clazz = frame_label acc_index = labelsDict[clazz] D['o'][acc_index] += 1 # confusion matrix # filename = os.path.basename(img_name) name, ext = os.path.splitext(img_name) # print("Processing the image : " + name + " ... {}/{}".format(num_img+1, len(label_dict))) key = cv2.waitKey(5) & 0xff ## Use Esc key to close the program if key == 27: break if key == ord('p'): cv2.waitKey(0) image_raw = cv2.imread(os.path.join(img_folder, img_name)) image_np = cv2.resize(image_raw, (im_width, im_height)) try: image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) except: print("Error converting to RGB") start = time.time() # set time boxes, scores = detector_utils.detect_objects(image_np, detection_graph, sessd) hands = from_image_crop_boxes(num_hands_detect, score_thresh, scores, boxes, im_width, im_height, 0.5) endh = time.time() # detect hand detect_t = endh - start D['detect_t'] += detect_t if not hands: continue else: D['num_pictures'] += 1 D['d'][acc_index] += 1 for rank, rect in enumerate(hands): D['num_hands'] += 1 left, top, right, bottom = rect region = image_np[top:bottom, left:right] region = cv2.resize(region, (224, 224), cv2.INTER_AREA) # feed = np.expand_dims(region, axis=0) # maybe it's a wrong format to feed img_data = tf.image.convert_image_dtype(np.array(region)[:, :, 0:3], dtype=tf.float32) # RGB # elements are in [0,1) resized_img = tf.image.resize_images(img_data, size=[224, 224], method=0) # decode an image img = resized_img.eval(session=sess) img.resize([1, 224, 224, 3]) # input an image array and inference to get predictions and set normal format predictions = end_points['Predictions'].eval( session=sess, feed_dict={input_images: img}) label = np.zeros((1, num_classes), dtype=int) label[0, acc_index] = 1 label_matrix = np.append(label_matrix, label, axis=0) score_matrix = np.append(score_matrix, predictions.reshape([1, num_classes]), axis=0) #print(label, predictions.reshape([1, num_classes])) predictions.resize([num_classes]) np.set_printoptions(precision=4, suppress=True) index = int(np.argmax(predictions)) y_true.append(acc_index) y_pred.append(index) D['r'][index] += 1 msg = img_name + ' ' + clazz + ' ' + labelsStr[index] CM[acc_index][index] += 1 if index == acc_index: D['acc'] += 1 D['tp'][index] += 1 logger.info(msg) if key == ord('s'): region = cv2.cvtColor(region, cv2.COLOR_RGB2BGR) cv2.imwrite( frame_path + name + '_' + str(D['num_frames']) + '_' + str(rank) + '.jpg', region) cv2.waitKey(0) endr = time.time() classify_t = endr - endh D['classify_t'] += classify_t print( "From {} pictures, we detect {} hands with {} accurate prediction ({:.2f})" .format(tot_count, D['num_hands'], D['acc'], D['acc'] / D['num_hands'])) result_log = '\n@@images_count: {} and detect_count: {}'.format(tot_count, D['num_pictures']) + \ '\n@@image_size: (width : {}, height: {})'.format(im_width, im_height) + \ '\n@@num_hand_detect: {} - {}%'.format(D['num_hands'], int(100 * D['num_hands'] / tot_count)) + \ '\n@@each_elapsed_time: (detect_hands: {:.4f}s, classify_hand: {:.4f}s)'.format( D['detect_t'] / tot_count, D['classify_t'] / D['num_hands']) + \ '\n@@classify_result: Fist Admire Victory Okay None Palm Six' + \ '\n {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- origin classes' \ '\n {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- detect classes' \ '\n {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- recognize count' \ '\n {: <6d}{: <8d}{: <9d}{: <6d}{: <6d}{: <6d}{} -- true positive' \ .format(D['o'][0], D['o'][1], D['o'][2], D['o'][3],D['o'][4], D['o'][5], D['o'][6], D['d'][0], D['d'][1], D['d'][2], D['d'][3],D['d'][4], D['d'][5], D['d'][6], D['r'][0], D['r'][1], D['r'][2], D['r'][3], D['r'][4], D['r'][5], D['r'][6], D['tp'][0], D['tp'][1], D['tp'][2], D['tp'][3], D['tp'][4], D['tp'][5], D['tp'][6]) + \ '\n@@accuracy: {}/{} - {}%'.format(D['acc'], D['num_hands'], int(100 * D['acc'] / D['num_hands'])) + \ '\n' + '-' * 100 + \ '\n' + str(CM) #print(result_log) logger.info(result_log) #print(classification_report(y_true, y_pred, target_names=labelsStr, digits=3)) logger.info( str( classification_report(y_true, y_pred, target_names=labelsStr, digits=3))) print(label_matrix.shape, score_matrix.shape) # 计算每一类的ROC fpr = dict() tpr = dict() roc_auc = dict() # Compute micro-average ROC curve and ROC area(方法二) fpr["micro"], tpr["micro"], _ = roc_curve(label_matrix.ravel(), score_matrix.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) # FPR就是横坐标,TPR就是纵坐标 plt.plot(fpr["micro"], tpr["micro"], c='r', lw=2, alpha=0.7, label=u'AUC=%.3f' % roc_auc["micro"]) plt.plot((0, 1), (0, 1), c='#808080', lw=1, ls='--', alpha=0.7) plt.xlim((-0.01, 1.02)) plt.ylim((-0.01, 1.02)) plt.xticks(np.arange(0, 1.1, 0.1)) plt.yticks(np.arange(0, 1.1, 0.1)) plt.xlabel('False Positive Rate', fontsize=13) plt.ylabel('True Positive Rate', fontsize=13) plt.grid(b=True, ls=':') plt.legend(loc='lower right', fancybox=True, framealpha=0.8, fontsize=12) plt.title(u'The ROC and AUC of MobileNet Classifier.', fontsize=17) plt.show() """
#1.2、先构建图结构,再加载权重 #临时添加slim到python搜索路径 import sys sys.path.append('./models/research/slim') #导入mobilenet_v2 from nets.mobilenet import mobilenet_v2 #重置图 tf.reset_default_graph() #导入mobilenet,先构建图结构 #加载完毕后,tf.get_default_graph()中包含了mobilenet计算图结构,可以使用tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)对比reset_graph前后的差异 images = tf.placeholder(tf.float32, (None, 224, 224, 3)) with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): logits, endpoints = mobilenet_v2.mobilenet(images, depth_multiplier=1.4) #定义saver类,用于恢复图权重 saver = tf.train.Saver() with tf.Session() as sess: #latest_checkpoint检查checkpoint检查点文件,查找最新的模型 #restore恢复图权重 saver.restore(sess, tf.train.latest_checkpoint('./model_ckpt/moilenet_v2')) #get_tensor_by_name通过张量名称获取张量 print( sess.run(tf.get_default_graph().get_tensor_by_name( 'MoilenetV2/Conv/weights:0')).shape) #1.3、frozen inference """ pb文件将变量取值和计算图整个结构统一放在一个文件中,通过convert_variable_to_constants