ver_list = [] ver_name_list = [] data_set = load_bin(opt.datasets, [112,112]) ver_list.append(data_set) ver_name_list.append(opt.dataset_name) images = tf.placeholder(tf.float32, [None, 112, 112, 3], name='image_inputs') labels = tf.placeholder(tf.int64, [None, ], name='labels_inputs') w_init_method = tf.contrib.layers.xavier_initializer(uniform=False) num_classes = opt.num_classes emb = resnet50(images, is_training=True) emb = tf.contrib.layers.flatten(emb) logit = arcface_loss(embedding=emb, labels=labels, w_init=w_init_method, out_num=num_classes) # inference_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels)) var_list = tf.trainable_variables() g_list = tf.global_variables() bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name] var_list += bn_moving_vars config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True counter = 0 saver = tf.train.Saver(var_list=var_list) with tf.Session(config=config) as sess:
def train(): num_classes = 85742 # 85164 batch_size = 64 # ckpt_save_dir = '/data/ChuyuanXiong/backup/face_real403_ckpt' w_init_method = tf.contrib.layers.xavier_initializer(uniform=False) tfr = '/data/ChuyuanXiong/up/face/tfrecords/tran.tfrecords' dataset = tf.data.TFRecordDataset(tfr) dataset = dataset.map(parse_function) dataset = dataset.shuffle(buffer_size=10000) dataset = dataset.batch(batch_size) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() images = tf.placeholder(tf.float32, [None, 112, 112, 3], name='image_inputs') labels = tf.placeholder(tf.int64, [ None, ], name='labels_inputs') emb = resnet50(images, is_training=True) logit = arcface_loss(embedding=emb, labels=labels, w_init=w_init_method, out_num=num_classes) inference_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels)) global_step = tf.Variable(name='global_step', initial_value=0, trainable=False) inc_op = tf.assign_add(global_step, 1, name='increment_global_step') lr_steps, values = generate_lr_boundaries() lr = tf.train.piecewise_constant(global_step, lr_steps, values, name='lr_schedule') opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9) grads = opt.compute_gradients(inference_loss) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = opt.apply_gradients(grads, global_step=global_step) config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True loss_vals = [] with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(iterator.initializer) for i in range(100): image_train, label_train = sess.run(next_element) _, loss_val, _ = sess.run([train_op, inference_loss, inc_op], feed_dict={ images: image_train, labels: label_train }) loss_vals.append(loss_val) plt.figure() plt.xticks(np.log([1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1]), (1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1)) plt.xlabel('learning rate') plt.ylabel('loss') plt.plot(np.log(values[:len(loss_vals)]), loss_vals) plt.show() plt.savefig('output.jpg')
def train(tfrecords, batch_size, lr, ckpt_save_dir, epoch, num_classes): w_init_method = tf.contrib.layers.xavier_initializer(uniform=False) tfr = tfrecords print('-------------Training Args-----------------') print('--tfrecords : ', tfr) print('--batch_size : ', batch_size) print('--num_classes : ', num_classes) print('--ckpt_save_dir : ', ckpt_save_dir) print('--lr : ', lr) print('-------------------------------------------') dataset = tf.data.TFRecordDataset(tfr) # 读取tfrecords数据 dataset = dataset.map(parse_function) dataset = dataset.shuffle(buffer_size=10000) dataset = dataset.batch(batch_size) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() # (?,112,112,3) images = tf.placeholder(tf.float32, [None, 112, 112, 3], name='image_inputs') labels = tf.placeholder(tf.int64, [None, ], name='labels_inputs') emb = resnet50(images, is_training=True) logit = arcface_loss(embedding=emb, labels=labels, w_init=w_init_method, out_num=num_classes) # (?,85742) inference_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels)) p = int(512.0 / batch_size) # 512/64 = 8 global_step = tf.Variable(name='global_step', initial_value=0, trainable=False) inc_op = tf.assign_add(global_step, 1, name='increment_global_step') lr_steps = [p * val for val in [40000, 60000, 80000]] # [320000, 480000, 640000] lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=lr, name='lr_schedule') # (x, [320000, 480000, 640000], [0.001, 0.0005, 0.0003, 0.0001]) 学习率迭代形参x指的是global_step,其实就是迭代次数,boundaries一个列表,内容指的是迭代次数所在的区间,values是个列表,存放在不同区间该使用的学习率的值 opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9) # 动量优化器 grads = opt.compute_gradients(inference_loss) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # apply_gradients只会在update_ops运行之后运行 train_op = opt.apply_gradients(grads, global_step=global_step) # 貌似是BN里面存储mean和std用的 pred = tf.nn.softmax(logit) # (?,85742),softmax概率结果 acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, axis=1), labels), dtype=tf.float32)) # 计算正确率 saver = tf.train.Saver(max_to_keep=5) counter = 0 with tf.Session(config=gpuConfig) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # saver.restore(sess, '/data/ChuyuanXiong/backup/face_real330_ckpt/Face_vox_iter_271800.ckpt') for i in range(epoch): sess.run(iterator.initializer) while True: try: # t1 = time.time() image_train, label_train = sess.run(next_element) # t2 = time.time() # print(counter, t2 - t1, " next_element") # print(image_train.shape, label_train.shape) # print(label_train) feed_dict = {images: image_train, labels: label_train} # options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() # _, loss_val, acc_val, _ = sess.run([train_op, inference_loss, acc, inc_op], feed_dict=feed_dict, # options=options, run_metadata=run_metadata) _, loss_val, acc_val, _ = sess.run([train_op, inference_loss, acc, inc_op], feed_dict=feed_dict) # fetched_timeline = timeline.Timeline(run_metadata.step_stats) # chrome_trace = fetched_timeline.generate_chrome_trace_format() # with open('timeline.json', 'w') as f: # f.write(chrome_trace) # t3 = time.time() # print(counter, t3 - t2, " train_op, inference_loss, acc, inc_op") counter += 1 if counter % 100 == 0: time_h = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) log_c = 'time:' + time_h + ' counter:' + str(counter) + ' loss_val:' + str( loss_val) + ' acc:' + str(acc_val) print(log_c) fp = open("iter_log.log", "a") fp.write(log_c + "\n") fp.close() if counter % 50000 == 0: filename = 'Face_vox_iter_{:d}'.format(counter) + '.ckpt' filename = os.path.join(ckpt_save_dir, filename) saver.save(sess, filename) except tf.errors.OutOfRangeError: print('End of epoch %d', i) break
def train(args): num_classes = args.num_classes # 85164 batch_size = args.batch_size ckpt_save_dir = args.ckpt_save_dir w_init_method = tf.contrib.layers.xavier_initializer(uniform=False) tfr = args.tfrecords print('-------------Training Args-----------------') print('--tfrecords : ', tfr) print('--batch_size : ', batch_size) print('--num_classes : ', num_classes) print('--ckpt_save_dir : ', ckpt_save_dir) print('--lr : ', args.lr) print('-------------------------------------------') dataset = tf.data.TFRecordDataset(tfr) dataset = dataset.map(parse_function) dataset = dataset.shuffle(buffer_size=10000) dataset = dataset.batch(batch_size) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() images = tf.placeholder(tf.float32, [None, 112, 112, 3], name='image_inputs') labels = tf.placeholder(tf.int64, [ None, ], name='labels_inputs') emb = resnet50(images, is_training=True) logit = arcface_loss(embedding=emb, labels=labels, w_init=w_init_method, out_num=num_classes) inference_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels)) p = int(512.0 / batch_size) global_step = tf.Variable(name='global_step', initial_value=0, trainable=False) inc_op = tf.assign_add(global_step, 1, name='increment_global_step') lr_steps = [p * val for val in [40000, 60000, 80000]] lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=args.lr, name='lr_schedule') opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9) grads = opt.compute_gradients(inference_loss) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = opt.apply_gradients(grads, global_step=global_step) pred = tf.nn.softmax(logit) acc = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(pred, axis=1), labels), dtype=tf.float32)) saver = tf.train.Saver(max_to_keep=3) config = tf.ConfigProto() config.allow_soft_placement = True config.gpu_options.allow_growth = True counter = 0 with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # saver.restore(sess, '/data/ChuyuanXiong/backup/face_real330_ckpt/Face_vox_iter_271800.ckpt') for i in range(args.epoch): sess.run(iterator.initializer) while True: try: image_train, label_train = sess.run(next_element) # print(image_train.shape, label_train.shape) # print(label_train) feed_dict = {images: image_train, labels: label_train} _, loss_val, acc_val, _ = sess.run( [train_op, inference_loss, acc, inc_op], feed_dict=feed_dict) counter += 1 # print('counter: ', counter, 'loss_val', loss_val, 'acc: ', acc_val) if counter % 100 == 0: print('counter: ', counter, 'loss_val', loss_val, 'acc: ', acc_val) filename = 'Face_vox_iter_{:d}'.format( counter) + '.ckpt' filename = os.path.join(ckpt_save_dir, filename) saver.save(sess, filename) except tf.errors.OutOfRangeError: print('End of epoch %d', i) break