def main(args): #此处导入的是:models.inception_resnet_v1模型,以后再看怎么更改模型 network = importlib.import_module(args.model_def) #用当前日期来命名模型 subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') #日志保存在c:\\users\\Administrator\logs\facenet\ 文件夹里 log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) #没有日志文件就创建一个 model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # 把参数写在日志文件中 facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) #arg_string:'E:/facenet/train_tripletloss.py' output_dir:'C:\\Users\\Administrator/logs/facenet\\20180314-181556' facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) #获取数据集,train_set是包含文件路径与标签的集合 #先输入一个父路径 path:'E:/facenet/data/lfw_160',接着输入每个子路径 # 输出:一个list,每个元素是一个ImageClass,里边包含图片地址的list(image_paths)以及对应的人名(name)[以后可能会直接调用这几个属性] train_set = facenet.get_dataset(args.data_dir) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: #用在判断是否有预训练模型,但是如果有,怎么加载呢? print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) #建立图 #with语句适用于对资源进行访问的场合,确保使用过程中是否发生异常都会执行必要嘚瑟“清理”操作 with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # 学习率 Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') #批大小 batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') #用于判断是训练还是测试 phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') #图像路径 image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='image_paths') # 图像标签 labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels') #新建一个队列,数据流操作,fifo先入先出 input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) #enqueue_many返回的是一个操作 enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') labels_batch = tf.identity(labels_batch, 'label_batch') # Build the inference (构造计算图) #其中prelogits是最后一层的输出 prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) #L2正则化(范化)函数 # embeddings = tf.nn.l2_normalize(输入向量, L2范化的维数(取0(列L2范化)或1(行L2范化)), 泛化的最小值边界, name='embeddings') embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) #将指数衰减应用在学习率上 learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # 计算损失 regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) #构建L2正则化 total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters # 确定优化方法并根据损失函数求梯度,在这里,每更行一次参数,global_step会加1 train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables()) # Create a saver创建一个saver用来保存或者从内存中回复一个模型参数 saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph.能够在GPU上分配的最大内存 gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) #写log文件 summary_writer = tf.summary.FileWriter(log_dir, sess.graph) #获取线程坐标 coord = tf.train.Coordinator() #将队列中的多用sunner开始执行 tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 #将所有数据过一遍的次数 默认500 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) #epoch_size是一个epoch中批的个数,这个epoch是全局的批处理个数以一个epoch中。。。这个epoch将用于求学习率 epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, args.embedding_size, anchor, positive, negative, triplet_loss) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size) return model_dir
def main(args): network = importlib.import_module(args.model_def, 'inference') subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) subdirmaxlin= subdir+'_lin_max' #fbtian_max maxlin_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdirmaxlin)#fbtian_max if not os.path.exists(maxlin_dir):#fbtian_max os.makedirs(maxlin_dir)#fbtian_max subdirmax= subdir+'_max' #fbtian_max modelmax_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdirmax)#fbtian_max if not os.path.exists(modelmax_dir):#fbtian_max os.makedirs(modelmax_dir)#fbtian_max # Store some git revision info in a text file in the log directory if not args.no_store_revision_info: src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) train_set = facenet.get_dataset(args.data_dir) #for i in range(args.send2): # np.random.shuffle(np.arange(10)) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None,3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None,3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3,), (3,)], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] fb_count=0 for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) print('filename:%s'%filename ) image = tf.image.decode_png(file_contents) if args.random_crop: print('args.random_crop') #fbtian_add image = tf.random_crop(image, [args.image_size, args.image_size, 3]) else: #print('else not args.random_crop') #come in fbtian_add image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: print('args.random_flip') image = tf.image.random_flip_left_right(image) if 1 : image = tf.image.random_brightness(image, max_delta=0.2) #Random brightness transformation image = tf.image.random_contrast(image, lower=0.2, upper=1.0)#Random contrast transformation fb_count+=1 #pylint: disable=no-member# fbtian_add image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) print('fb_count:%d'%fb_count) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'input') ##fbtian batch_norm_params = { # Decay for the moving averages 'decay': 0.995, # epsilon to prevent 0s in variance 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], # Only update statistics during training mode 'is_training': phase_train_placeholder } # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, weight_decay=args.weight_decay) pre_embeddings = slim.fully_connected(prelogits, args.embedding_size, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, scope='Bottleneck', reuse=False) embeddings = tf.nn.l2_normalize(pre_embeddings, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1,3,args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables()) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)### #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) config = tf.ConfigProto(allow_soft_placement=True) ########################################### gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)################################### config.gpu_options.allow_growth = True########################################### #sess = tf.Session(config=config)########################################### sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,intra_op_parallelism_threads=8)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder:True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder:True}) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 acc_tmp=0 val_tmp=0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) print(global_step ) epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, args.embedding_size, anchor, positive, negative, triplet_loss) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: acc,val=evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size) print('starting to save the maxacc and maxval ') #fbtian_max if acc>acc_tmp: #fbtian_max maxmodel_path = os.path.join(maxlin_dir, 'model-%s.ckpt_accmax'%subdir) #fbtian_max saver.save(sess, maxmodel_path, write_meta_graph=False)#fbtian_max shutil.copy( maxmodel_path+'.data-00000-of-00001', modelmax_dir) shutil.copy( maxmodel_path+'.index', modelmax_dir) acc_tmp=acc #fbtian_max if val>val_tmp: #fbtian_max maxmodel_path = os.path.join(maxlin_dir, 'model-%s.ckpt_valmax'%subdir) #fbtian_max saver.save(sess, maxmodel_path, write_meta_graph=False)#fbtian_max shutil.copy( maxmodel_path+'.data-00000-of-00001', modelmax_dir) shutil.copy( maxmodel_path+'.index', modelmax_dir) val_tmp=val #fbtian print('end to save the maxacc and maxval ') #fbtian sess.close() return model_dir
def transform(args, pretrained_ckpt, image_size, output_dir, bottleneck_size): with tf.Graph().as_default(): learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) processed_image = inception_preprocessing.preprocess_image( image, args.image_size, args.image_size, is_training=False) # if args.random_crop: # image = tf.random_crop(image, [args.image_size, args.image_size, 3]) # else: # image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: processed_image = tf.image.random_flip_left_right( processed_image) images.append(processed_image) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') labels_batch = tf.identity(labels_batch, 'label_batch') with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope( weight_decay=args.weight_decay)): prelogits, _ = inception_resnet_v2.inception_resnet_v2( image_batch, num_classes=bottleneck_size, is_training=phase_train_placeholder) exclude = ['InceptionResnetV2/Logits', 'InceptionResnetV2/AuxLogits'] variables_to_restore = slim.get_variables_to_restore(exclude=exclude) loader = tf.train.Saver(variables_to_restore) global_step = tf.train.get_or_create_global_step() embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_layers = [ 'Logits', 'Conv2d_7b_1x1', 'Block8', 'Repeat_2', 'Mixed_7a' ] var_list = [] for v in tf.global_variables(): splits = v.name.split("/") if len(splits) > 2 and splits[1] in train_layers: var_list.append(v) facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, var_list) saver = tf.train.Saver(max_to_keep=3) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) loader.restore(sess, pretrained_ckpt) checkpoint_path = os.path.join(output_dir, 'model-%s.ckpt' % bottleneck_size) saver.save(sess, checkpoint_path, write_meta_graph=False) metagraph_filename = os.path.join( output_dir, 'model-%s.meta' % bottleneck_size) saver.export_meta_graph(metagraph_filename)
def main(args): print(args.model_def) network = importlib.import_module(args.model_def, 'inference') print(1111) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) train_set = facenet.get_dataset(args.data_dir) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_png(file_contents) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) # Build the inference graph prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables()) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, args.embedding_size, anchor, positive, negative, triplet_loss) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size) sess.close() return model_dir
def train(server, cluster_spec, args, ctx): task_index = ctx.task_index num_workers = len(cluster_spec['worker']) is_chief = task_index == 0 local_data_path = args.local_data_path data_dir = os.path.join(local_data_path, "train") val_dir = os.path.join(local_data_path, "val") val_pairs = os.path.join(local_data_path, "pairs.txt") subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = args.workspace + "/logs/" + subdir checkpoint_dir = args.checkpoint_dir if args.checkpoint_dir else ( args.workspace + "/models/" + subdir) if task_index == 0: if not tf.gfile.Exists(args.workspace): tf.gfile.MakeDirs(args.workspace) if not tf.gfile.Exists(checkpoint_dir): tf.gfile.MakeDirs(checkpoint_dir) if not tf.gfile.Exists(log_dir): tf.gfile.MakeDirs(log_dir) if is_chief and args.transfer_learning: print("Transforming the pretrained inception model...") transform_pretrained.transform(args, args.pretrained_ckpt, args.image_size, checkpoint_dir, args.embedding_size) print("Transform finished") tf.reset_default_graph() seed = random.SystemRandom().randint(0, 10240) print("Random seed: " + str(seed)) np.random.seed(seed=seed) train_set = facenet.get_dataset(data_dir) print('Model directory: %s' % checkpoint_dir) print('Log directory: %s' % log_dir) # Read the file containing the pairs used for testing pairs = read_pairs(val_pairs) # Get the paths for the corresponding images val_image_paths, actual_issame = get_paths(val_dir, pairs) with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % task_index, cluster=cluster_spec)): tf.set_random_seed(seed) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=10000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) processed_image = inception_preprocessing.preprocess_image( image, args.image_size, args.image_size, is_training=False) if args.random_flip: processed_image = tf.image.random_flip_left_right( processed_image) images.append(processed_image) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') labels_batch = tf.identity(labels_batch, 'label_batch') with slim.arg_scope( inception_resnet_v2_arg_scope(weight_decay=args.weight_decay)): prelogits, _ = inception_resnet_v2( image_batch, num_classes=args.embedding_size, is_training=phase_train_placeholder) global_step = tf.train.get_or_create_global_step() embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') tf.summary.scalar('learning_rate', learning_rate) tf.summary.scalar('triplet_loss', triplet_loss) tf.summary.scalar('total_losses', total_loss) # Build a Graph that trains the model with one batch of examples and updates the model parameters train_layers = [ 'Logits', 'Conv2d_7b_1x1', 'Block8', 'Repeat_2', 'Mixed_7a' ] var_list = [] for v in tf.global_variables(): splits = v.name.split("/") if len(splits) > 2 and splits[1] in train_layers: var_list.append(v) train_op, opt = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, var_list, sync_replicas=args.sync_replicas, replicas_to_aggregate=num_workers) summary_op = tf.summary.merge_all() hooks = [ tf.train.StopAtStepHook(int(args.epochs) * int(args.epoch_size)) ] if args.sync_replicas: hooks += [opt.make_session_run_hook(is_chief)] sess_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, device_filters=['/job:ps', '/job:worker/task:%d' % task_index]) #sess_config.operation_timeout_in_ms=80000 with tf.train.MonitoredTrainingSession( master=server.target, is_chief=is_chief, config=sess_config, checkpoint_dir=checkpoint_dir, hooks=hooks, save_summaries_steps=None, save_summaries_secs=None, stop_grace_period_secs=30, save_checkpoint_secs=600) as sess: # Training and validation loop summary_writer = tf.summary.FileWriter( log_dir, sess.graph) if is_chief else None while not sess.should_stop(): # Train for one epoch step = _train(args, sess, train_set, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, args.embedding_size, triplet_loss, summary_op, summary_writer) if is_chief: # checkpoint_path = os.path.join(checkpoint_dir, 'model-%s.ckpt' % "test") # saver.save(sess._sess._sess._sess._sess, checkpoint_path, global_step=step, write_meta_graph=False) evaluate(sess, val_image_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, args.lfw_nrof_folds, step, summary_writer, args.embedding_size) return checkpoint_dir