def load_model(model_file, dev): os.environ["CUDA_VISIBLE_DEVICES"] = dev tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) net = network_desp.Network() inputs = net.get_inputs() net.inference('TEST', inputs) test_collect_dict = net.get_test_collection() test_collect = [it for it in test_collect_dict.values()] saver = tf.train.Saver() saver.restore(sess, model_file) return partial(sess.run, test_collect), inputs
nr_records = len(records) read_func = dataset_dict['read_func'] nr_devs = len(devs) for epoch_num in range(args.start_epoch, args.end_epoch + 1): model_file = osp.join(config.output_dir, 'model_dump', 'epoch_{:d}'.format(epoch_num) + '.ckpt') pbar = tqdm(total=nr_records) all_results = [] if nr_devs > 1: with tf.variable_scope('', reuse=tf.AUTO_REUSE): os.environ["CUDA_VISIBLE_DEVICES"] = devs[0] tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) net = network_desp.Network() inputs = net.get_inputs() net.inference('TEST', inputs) saver = tf.train.Saver() saver.restore(sess, model_file) vars = tf.trainable_variables() for v in vars: print( '======================================================================' ) print(v.op.name) op = sess.graph.get_tensor_by_name(v.op.name + ':0') res = sess.run(op) print(res)
def train(args): logger = QuickLogger(log_dir=cfg.output_dir).get_logger() logger.info(cfg) np.random.seed(cfg.rng_seed) num_gpu = len(args.devices.split(',')) net = network_desp.Network() data_iter = get_data_flow() prefetch_data_layer = PrefetchingIter(data_iter, num_gpu) with tf.Graph().as_default(), tf.device('/device:CPU:0'): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0.), trainable=False) tfconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) tf.set_random_seed(cfg.rng_seed) lr = tf.Variable(cfg.get_lr(0), trainable=False) lr_placeholder = tf.placeholder(lr.dtype, shape=lr.get_shape()) update_lr_op = lr.assign(lr_placeholder) opt = tf.train.MomentumOptimizer(lr, cfg.momentum) '''data processing''' inputs_list = [] for i in range(num_gpu): inputs_list.append(net.get_inputs()) put_op_list = [] get_op_list = [] for i in range(num_gpu): with tf.device("/GPU:%s" % i): area = tf.contrib.staging.StagingArea( dtypes=[tf.float32 for _ in range(len(inputs_list[0]))]) put_op_list.append(area.put(inputs_list[i])) get_op_list.append(area.get()) coord = tf.train.Coordinator() init_all_var = tf.initialize_all_variables() sess.run(init_all_var) queue_runner = tf.train.start_queue_runners(coord=coord, sess=sess) '''end of data processing''' tower_grads = [] biases_regularizer = tf.no_regularizer biases_ini = tf.constant_initializer(0.0) weights_regularizer = tf.contrib.layers.l2_regularizer( cfg.weight_decay) with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=biases_ini): loss = net.inference('TRAIN', get_op_list[i]) loss = loss / num_gpu if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) loss = loss + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = opt.compute_gradients(loss) tower_grads.append(grads) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in grads: scale = 1. # if '/biases:' in var.name: # scale *= 2. if 'conv_new' in var.name: scale *= 3. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) apply_gradient_op = opt.apply_gradients(final_gvs, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( 0.9999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # apply_gradient_op = opt.apply_gradients(grads) # saver = tf.train.Saver(tf.global_variables(), max_to_keep=100000) saver = tf.train.Saver(max_to_keep=100000) variables = tf.global_variables() var_keep_dic = get_variables_in_checkpoint_file(cfg.weight) var_keep_dic.pop('global_step') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(update_lr_op, {lr_placeholder: cfg.get_lr(0) * num_gpu}) sess.run(tf.variables_initializer(variables, name='init')) variables_to_restore = [] for v in variables: if v.name.split(':')[0] in var_keep_dic: # print('Varibles restored: %s' % v.name) variables_to_restore.append(v) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, cfg.weight) train_collection = net.get_train_collection() sess2run = [] sess2run.append(train_op) sess2run.append(put_op_list) for col in train_collection.values(): sess2run.append(col) timer = Timer() # warm up staging area inputs_names = net.get_inputs(mode=1) for _ in range(4): blobs_list = prefetch_data_layer.forward() feed_dict = {} for i, inputs in enumerate(inputs_list): # blobs = next(data_iter) blobs = blobs_list[i] for it_idx, it_inputs_name in enumerate(inputs_names): feed_dict[inputs[it_idx]] = blobs[it_inputs_name] sess.run([put_op_list], feed_dict=feed_dict) for epoch in range(cfg.max_epoch): if epoch == 0 and cfg.warm_iter > 0: pbar = tqdm(range(cfg.warm_iter)) up_lr = cfg.get_lr(0) * num_gpu bottom_lr = up_lr * cfg.warm_fractor iter_delta_lr = 1.0 * (up_lr - bottom_lr) / cfg.warm_iter cur_lr = bottom_lr for iter in pbar: sess.run(update_lr_op, {lr_placeholder: cur_lr}) cur_lr += iter_delta_lr feed_dict = {} blobs_list = prefetch_data_layer.forward() for i, inputs in enumerate(inputs_list): # blobs = next(data_iter) blobs = blobs_list[i] for it_idx, it_inputs_name in enumerate(inputs_names): feed_dict[inputs[it_idx]] = blobs[it_inputs_name] sess_ret = sess.run(sess2run, feed_dict=feed_dict) print_str = 'iter %d, ' % (iter) for idx_key, iter_key in enumerate( train_collection.keys()): print_str += iter_key + ': %.4f, ' % sess_ret[idx_key + 2] print_str += 'lr: %.4f, speed: %.3fs/iter' % \ (cur_lr, timer.average_time) logger.info(print_str) pbar.set_description(print_str) pbar = tqdm(range(1, cfg.nr_image_per_epoch // \ (num_gpu * cfg.train_batch_per_gpu) + 1)) cur_lr = cfg.get_lr(epoch) * num_gpu sess.run(update_lr_op, {lr_placeholder: cur_lr}) logger.info("epoch: %d" % epoch) for iter in pbar: timer.tic() feed_dict = {} blobs_list = prefetch_data_layer.forward() for i, inputs in enumerate(inputs_list): # blobs = next(data_iter) blobs = blobs_list[i] for it_idx, it_inputs_name in enumerate(inputs_names): feed_dict[inputs[it_idx]] = blobs[it_inputs_name] sess_ret = sess.run(sess2run, feed_dict=feed_dict) timer.toc() print_str = 'iter %d, ' % (iter) for idx_key, iter_key in enumerate(train_collection.keys()): print_str += iter_key + ': %.4f, ' % sess_ret[idx_key + 2] print_str += 'lr: %.4f, speed: %.3fs/iter' % \ (cur_lr, timer.average_time) logger.info(print_str) pbar.set_description(print_str) snapshot(sess, saver, epoch) coord.request_stop() coord.join(queue_runner)
def train(args): logger = QuickLogger(log_dir=cfg.output_dir).get_logger() logger.info(cfg) np.random.seed(cfg.rng_seed) num_gpu = len(args.devices.split(',')) net = network_desp.Network() data_iter = get_data_flow() # 数据迭代器 prefetch_data_layer = PrefetchingIter(data_iter, num_gpu) # tf.device()指定tf运行的GPU或CPU设备 with tf.Graph().as_default(), tf.device('/gpu:0'): global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0.), trainable=False) # log_device_placement=True会打印出执行操作所使用的设备 tfconfig = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) # 刚一开始分配少量的GPU容量,然后按需慢慢的增加,由于不会释放内存,所以会导致碎片 tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) tf.set_random_seed(cfg.rng_seed) lr = tf.Variable(cfg.get_lr(0), trainable=False) lr_placeholder = tf.placeholder(lr.dtype, shape=lr.get_shape()) update_lr_op = lr.assign(lr_placeholder) # 定义优化器 opt = tf.train.MomentumOptimizer(lr, cfg.momentum) '''data processing''' inputs_list = [] for i in range(num_gpu): inputs_list.append(net.get_inputs()) put_op_list = [] get_op_list = [] for i in range(num_gpu): with tf.device("/GPU:%s" % i): area = tf.contrib.staging.StagingArea( dtypes=[tf.float32 for _ in range(len(inputs_list[0]))]) put_op_list.append(area.put(inputs_list[i])) get_op_list.append(area.get()) ''' tf.train.Coordinator()是用来创建一个线程管理器对象,因为tf的session是支持多线程的, 可以在同一个session中创建多个线程并行执行.Coordinator对象用来管理session中的多线程, 可以用来同时停止多个工作线程并且向那个在等待所有工作线程终止的程序报告异常. ''' coord = tf.train.Coordinator() init_all_var = tf.initialize_all_variables() sess.run(init_all_var) # QueueRunner类用来协调多个工作线程同时将多个张量推入同一个队列中 queue_runner = tf.train.start_queue_runners(coord=coord, sess=sess) '''end of data processing''' tower_grads = [] biases_regularizer = tf.no_regularizer biases_ini = tf.constant_initializer(0.0) weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.weight_decay) with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope([slim.model_variable, slim.variable], device='/gpu:0'): with slim.arg_scope( [slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=biases_ini): loss = net.inference('TRAIN', get_op_list[i]) loss = loss / num_gpu if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) loss = loss + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = opt.compute_gradients(loss) tower_grads.append(grads) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in grads: scale = 1. # if '/biases:' in var.name: # scale *= 2. if 'conv_new' in var.name: scale *= 3. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) # 梯度更新操作 apply_gradient_op = opt.apply_gradients( final_gvs, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( 0.9999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # apply_gradient_op = opt.apply_gradients(grads) # saver = tf.train.Saver(tf.global_variables(), max_to_keep=100000) '''max_to_keep用来设置保存模型的个数,默认为5,即保存最近的5个模型.如果你想 每训练一代(epoch)就想保存一次模型,则可以将max_to_keep设置为None或者0''' saver = tf.train.Saver(max_to_keep=100000) variables = tf.global_variables() var_keep_dic = get_variables_in_checkpoint_file(cfg.weight) var_keep_dic.pop('global_step') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(update_lr_op, {lr_placeholder: cfg.get_lr(0) * num_gpu}) sess.run(tf.variables_initializer(variables, name='init')) variables_to_restore = [] for v in variables: if v.name.split(':')[0] in var_keep_dic: # print('Varibles restored: %s' % v.name) variables_to_restore.append(v) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, cfg.weight) train_collection = net.get_train_collection() sess2run = [] sess2run.append(train_op) sess2run.append(put_op_list) for col in train_collection.values(): sess2run.append(col) timer = Timer() # warm up staging area inputs_names = net.get_inputs(mode=1) logger.info("start warm up") for _ in range(4): blobs_list = prefetch_data_layer.forward() feed_dict = {} for i, inputs in enumerate(inputs_list): # blobs = next(data_iter) blobs = blobs_list[i] for it_idx, it_inputs_name in enumerate(inputs_names): feed_dict[inputs[it_idx]] = blobs[it_inputs_name] sess.run([put_op_list], feed_dict=feed_dict) logger.info("start train") for epoch in range(cfg.max_epoch): if epoch == 0 and cfg.warm_iter > 0: # pbar = tqdm(range(cfg.warm_iter)) # 预热默认是500次迭代 pbar = range(cfg.warm_iter) # 预热默认是500次迭代 up_lr = cfg.get_lr(0) * num_gpu bottom_lr = up_lr * cfg.warm_fractor iter_delta_lr = 1.0 * (up_lr - bottom_lr) / cfg.warm_iter cur_lr = bottom_lr for iter in pbar: sess.run(update_lr_op, {lr_placeholder: cur_lr}) cur_lr += iter_delta_lr feed_dict = {} blobs_list = prefetch_data_layer.forward() for i, inputs in enumerate(inputs_list): # blobs = next(data_iter) blobs = blobs_list[i] for it_idx, it_inputs_name in enumerate(inputs_names): feed_dict[inputs[it_idx]] = blobs[it_inputs_name] sess_ret = sess.run(sess2run, feed_dict=feed_dict) if iter % cfg.disp_interval == 0: print_str = 'iter %d, ' % (iter) for idx_key, iter_key in enumerate(train_collection.keys()): print_str += iter_key + ': %.4f, ' % sess_ret[ idx_key + 2] print_str += 'lr: %.4f, speed: %.3fs/iter' % \ (cur_lr, timer.average_time) logger.info(print_str) # pbar.set_description(print_str) '''nr_image_per_epoch这个视数据集不同而不同,像coco2014这个数就是80k的样子, 每个cpu装入train_batch_per_gpu张图片,总共num_gpu个gpu,以此来计算需要多 少次迭代, +1是因为range的原因''' # pbar = tqdm(range(1, cfg.nr_image_per_epoch // (num_gpu * cfg.train_batch_per_gpu) + 1)) pbar = range(1, cfg.nr_image_per_epoch // (num_gpu * cfg.train_batch_per_gpu) + 1) cur_lr = cfg.get_lr(epoch) * num_gpu sess.run(update_lr_op, {lr_placeholder: cur_lr}) logger.info("epoch: %d" % epoch) for iter in pbar: timer.tic() feed_dict = {} blobs_list = prefetch_data_layer.forward() for i, inputs in enumerate(inputs_list): # blobs = next(data_iter) blobs = blobs_list[i] for it_idx, it_inputs_name in enumerate(inputs_names): feed_dict[inputs[it_idx]] = blobs[it_inputs_name] sess_ret = sess.run(sess2run, feed_dict=feed_dict) timer.toc() if iter % cfg.disp_interval == 0: print_str = 'iter %d, ' % (iter) for idx_key, iter_key in enumerate(train_collection.keys()): print_str += iter_key + ': %.4f, ' % sess_ret[idx_key + 2] print_str += 'lr: %.4f, speed: %.3fs/iter' % \ (cur_lr, timer.average_time) logger.info(print_str) # pbar.set_description(print_str) if iter % cfg.snapshot_interval == 0: snapshot(sess, saver, epoch, global_step) snapshot(sess, saver, epoch, global_step) coord.request_stop() coord.join(queue_runner)
def train(args): #logger = QuickLogger(log_dir=cfg.output_dir).get_logger() #logger.info(cfg) np.random.seed(cfg.rng_seed) num_gpu = len(args.devices.split(',')) net = network_desp.Network() # 网络结构就在这里了 data_iter = get_data_flow() # 数据迭代器 prefetch_data_layer = PrefetchingIter(data_iter, num_gpu) # tf.device()指定tf运行的GPU或CPU设备,这里指定cpu是没有问题的,把它放在了最外层, # 意思是将简单的运算放在cpu上,只将神经网络的训练过程放在GPU上,不要误会! with tf.Graph().as_default(), tf.device('/cpu:0'): # 定义训练轮数 global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0.), trainable=False) # log_device_placement=True会打印出执行操作所使用的设备 tfconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) # 刚一开始分配少量的GPU容量,然后按需慢慢的增加,由于不会释放内存,所以会导致碎片 tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) tf.set_random_seed(cfg.rng_seed) lr = tf.Variable(cfg.get_lr(0), trainable=False) lr_placeholder = tf.placeholder(lr.dtype, shape=lr.get_shape()) update_lr_op = lr.assign( lr_placeholder) # 运行訪操作表示给lr重新赋值,但你也要给lr_placeholder塞好东西吧 opt = tf.train.MomentumOptimizer(lr, cfg.momentum) # 定义优化器 '''data processing''' inputs_list = [] for i in range(num_gpu): inputs_list.append(net.get_inputs()) put_op_list = [] get_op_list = [] for i in range(num_gpu): with tf.device("/gpu:%s" % i): area = tf.contrib.staging.StagingArea( dtypes=[tf.float32 for _ in range(len(inputs_list[0]))]) put_op_list.append(area.put(inputs_list[i])) get_op_list.append(area.get()) '''tf.train.Coordinator()是用来创建一个线程管理器对象,因为tf的session是支持多线程的, 可以在同一个session中创建多个线程并行执行.Coordinator对象用来管理session中的多线程, 可以用来同时停止多个工作线程并且向那个在等待所有工作线程终止的程序报告异常.''' coord = tf.train.Coordinator() # 这个用于初始化变量,但注意这句话并不会立即生效,要生效必需在session中运行中才行 init_all_var = tf.initialize_all_variables() # 让上面的初始化生效 sess.run(init_all_var) # QueueRunner类用来协调多个工作线程同时将多个张量推入同一个队列中 queue_runner = tf.train.start_queue_runners(coord=coord, sess=sess) '''end of data processing''' tower_grads = [] # 定义了几个初始化操作 biases_regularizer = tf.no_regularizer biases_ini = tf.constant_initializer(0.0) weights_regularizer = tf.contrib.layers.l2_regularizer( cfg.weight_decay) '''slim.arg_scope可以定义一些函数的默认参数值,在scope 内重复用到这些函数时就可以不用把所有参数都写一遍''' with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): # 将神经网络的优化过程指定在不同的GPU上 with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/gpu:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=biases_ini): loss = net.inference('TRAIN', get_op_list[i]) loss = loss / num_gpu if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # 经验损失 + 结构损失(正则项) loss = loss + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = opt.compute_gradients( loss) # 计算梯度(这还是这一个GPU的梯度) tower_grads.append(grads) if len(tower_grads) > 1: # 多GPU并行训练,各GPU计算的梯度保存在tower_grads里面, # 这里将他们加起来,采用同步模式的话,会要计算一个他们的平均 # 梯度,以此来更新参数 grads = sum_gradients(tower_grads) else: grads = tower_grads[0] final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in grads: scale = 1. # if '/biases:' in var.name: # scale *= 2. if 'conv_new' in var.name: scale *= 3. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) # 梯度更新操作 apply_gradient_op = opt.apply_gradients(final_gvs, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( 0.9999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) # 将多个操作合为一组,返回一个op train_op = tf.group(apply_gradient_op, variables_averages_op) # apply_gradient_op = opt.apply_gradients(grads) # saver = tf.train.Saver(tf.global_variables(), max_to_keep=100000) '''max_to_keep用来设置保存模型的个数,默认为5,即保存最近的5个模型.如果你想 每训练一代(epoch)就想保存一次模型,则可以将max_to_keep设置为None或者0''' saver = tf.train.Saver(max_to_keep=100000) variables = tf.global_variables() # 获取程序中的变量,返回的是变量的一个列表 var_keep_dic = get_variables_in_checkpoint_file( cfg.weight) # 从预训练文件中获取参数 var_keep_dic.pop('global_step') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(update_lr_op, {lr_placeholder: cfg.get_lr(0) * num_gpu}) sess.run(tf.variables_initializer(variables, name='init')) variables_to_restore = [] for v in variables: # 为啥下面这样操作?因为v.name是变量名,e.g. v:0,v1:0 if v.name.split(':')[0] in var_keep_dic: # print('Varibles restored: %s' % v.name) variables_to_restore.append(v) # 从预训练文件中恢复参数,这里Saver(var_list=None,...)的第一个参数 # var_list指定将要保存/恢复的变量,它可以传递dict或list restorer = tf.train.Saver( variables_to_restore) # 为毛不是直接tf.train.Saver() ''' 没想清楚,先搁置 ckpt = tf.train.latest_checkpoint(cfg.ckpt_dir) if ckpt is not None: print("restore from checkpoint {}".format(ckpt)) restorer.restore(sess, ckpt) ''' restorer.restore(sess, cfg.weight) train_collection = net.get_train_collection() sess2run = [] sess2run.append(train_op) sess2run.append(put_op_list) for col in train_collection.values(): sess2run.append(col) timer = Timer() # warm up staging area inputs_names = net.get_inputs(mode=1) #logger.info("start warm up") for _ in range(4): blobs_list = prefetch_data_layer.forward() feed_dict = {} for i, inputs in enumerate(inputs_list): # blobs = next(data_iter) blobs = blobs_list[i] for it_idx, it_inputs_name in enumerate(inputs_names): feed_dict[inputs[it_idx]] = blobs[it_inputs_name] sess.run([put_op_list], feed_dict=feed_dict) print("##### start train #####") for epoch in range(cfg.max_epoch): if epoch == 0 and cfg.warm_iter > 0: # pbar = tqdm(range(cfg.warm_iter)) # 预热默认是500次迭代 pbar = range(cfg.warm_iter) # 预热默认是500次迭代 up_lr = cfg.get_lr(0) * num_gpu bottom_lr = up_lr * cfg.warm_fractor iter_delta_lr = 1.0 * (up_lr - bottom_lr) / cfg.warm_iter cur_lr = bottom_lr for iter in pbar: sess.run(update_lr_op, {lr_placeholder: cur_lr}) cur_lr += iter_delta_lr feed_dict = {} blobs_list = prefetch_data_layer.forward() for i, inputs in enumerate(inputs_list): # blobs = next(data_iter) blobs = blobs_list[i] for it_idx, it_inputs_name in enumerate(inputs_names): feed_dict[inputs[it_idx]] = blobs[it_inputs_name] sess_ret = sess.run(sess2run, feed_dict=feed_dict) if iter % cfg.disp_interval == 0: print_str = 'iter %d, ' % (iter) for idx_key, iter_key in enumerate( train_collection.keys()): print_str += iter_key + ': %.4f, ' % sess_ret[ idx_key + 2] print_str += 'lr: %.4f, speed: %.3fs/iter' % \ (cur_lr, timer.average_time) print(print_str) # pbar.set_description(print_str) '''nr_image_per_epoch这个视数据集不同而不同,像coco2014这个数就是80k的样子, 每个cpu装入train_batch_per_gpu张图片,总共num_gpu个gpu,以此来计算需要多 少次迭代, +1是因为range的原因''' # pbar = tqdm(range(1, cfg.nr_image_per_epoch // (num_gpu * cfg.train_batch_per_gpu) + 1)) pbar = range( 1, cfg.nr_image_per_epoch // (num_gpu * cfg.train_batch_per_gpu) + 1) # 为了更新学习率,lr_placeholder是前面定义的一个占位符,通过这种方式在 # 运动中更新学习率,在不同的epoch阶段要适当调整学习率嘛,嘿嘿 cur_lr = cfg.get_lr(epoch) * num_gpu sess.run(update_lr_op, {lr_placeholder: cur_lr}) print("epoch: %d" % epoch) for iter in pbar: timer.tic() feed_dict = {} blobs_list = prefetch_data_layer.forward( ) # 数据是通过这个prefetch_data_layer来? for i, inputs in enumerate(inputs_list): # blobs = next(data_iter) blobs = blobs_list[i] for it_idx, it_inputs_name in enumerate(inputs_names): feed_dict[inputs[it_idx]] = blobs[it_inputs_name] sess_ret = sess.run(sess2run, feed_dict=feed_dict) timer.toc() if iter % cfg.disp_interval == 0: print_str = 'iter %d, ' % (iter) for idx_key, iter_key in enumerate( train_collection.keys()): print_str += iter_key + ': %.4f, ' % sess_ret[idx_key + 2] print_str += 'lr: %.4f, speed: %.3fs/iter' % \ (cur_lr, timer.average_time) print(print_str) # pbar.set_description(print_str) if iter % cfg.snapshot_interval == 0: snapshot(sess, saver, epoch, global_step) snapshot(sess, saver, epoch, global_step) coord.request_stop() # 跑完了,请求停止 coord.join(queue_runner) # 等待所有线程终止
def train(logger): dataset = Dataset(isTraining=True) net = network_desp.Network() with tf.Graph().as_default(), tf.device('/device:CPU:0'): tf.set_random_seed(cfg.RNG_SEED) lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) momentum = cfg.TRAIN.MOMENTUM opt = tf.train.MomentumOptimizer(lr, momentum) # opt = tf.train.AdamOptimizer(lr) inputs_list = [] for i in range(cfg.num_gpu): inputs = [] inputs.append(tf.placeholder(tf.float32, shape=[1, cfg.image_size, cfg.image_size, 3])) inputs.append(tf.placeholder(tf.float32, shape=[1, 3])) inputs.append(tf.placeholder(tf.float32, shape=[None, 5])) inputs.append(tf.placeholder(tf.uint8, shape=[None, None, None])) inputs.append(tf.placeholder(tf.float32, shape=[])) inputs.append(tf.placeholder(tf.float32, shape=[])) inputs_list.append(inputs) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY) with tf.variable_scope(tf.get_variable_scope()): for i in range(cfg.num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i) as scope: with slim.arg_scope([slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): loss = net.inference('TRAIN', inputs_list[i]) loss = loss * (1.0 / cfg.num_gpu) tf.get_variable_scope().reuse_variables() grads = opt.compute_gradients(loss) tower_grads.append(grads) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] #apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # grads = [(tf.clip_by_value(grad, -0.01, 0.01), var) for grad, var in grads] apply_gradient_op = opt.apply_gradients(grads) saver = tf.train.Saver(max_to_keep=100000) tfconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True sess = tf.Session(config=tfconfig) variables = tf.global_variables() var_keep_dic = get_variables_in_checkpoint_file(cfg.weight) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE)) sess.run(tf.variables_initializer(variables, name='init')) variables_to_restore = [] for v in variables: if v.name.split(':')[0] in var_keep_dic: # print('Varibles restored: %s' % v.name) variables_to_restore.append(v) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, cfg.weight) timer = Timer() train_collection = net.get_train_collection() sess_ret = [] sess_ret.append(apply_gradient_op) for col in train_collection.values(): sess_ret.append(col) sess_ret.append(lr) summary_writer = tf.summary.FileWriter(model_dump_dir, sess.graph) sess.run(tf.assign(lr, cfg.TRAIN.LEARNING_RATE)) for iter in range(1, cfg.TRAIN.MAX_ITER+1): if iter == cfg.TRAIN.STEPSIZE: sess.run( tf.assign(lr, cfg.TRAIN.LEARNING_RATE * cfg.TRAIN.GAMMA)) if iter == cfg.TRAIN.STEPSIZE_2: sess.run( tf.assign(lr, cfg.TRAIN.LEARNING_RATE * cfg.TRAIN.GAMMA * cfg.TRAIN.GAMMA)) feed_dict = {} for inputs in inputs_list: image, gt_boxes, gt_masks = dataset.forward() image, info, gt_boxes, gt_masks = make_data(image, gt_boxes, gt_masks) image, gt_boxes, gt_masks = random_flip(image, gt_boxes, gt_masks) feed_dict[inputs[0]] = image feed_dict[inputs[1]] = info feed_dict[inputs[2]] = gt_boxes feed_dict[inputs[3]] = gt_masks if iter < 5000: feed_dict[inputs[4]] = 1 else: feed_dict[inputs[4]] = 1 feed_dict[inputs[5]] = iter timer.tic() _, rpn_loss_box, rpn_loss_cat, rpn_cross_entropy_pos, rpn_cross_entropy_neg, loss_seg, loss_wd, total_loss, summuries_str, \ cur_lr = sess.run(sess_ret, feed_dict=feed_dict) timer.toc() if iter % (cfg.TRAIN.DISPLAY) == 0: logger.info( 'iter: %d/%d, loss: %.4f, box: %.4f,' 'cat: %.4f, pos: %.4f, neg: %.4f, seg:%.3f, wd: %.4f, lr: %.4f, speed: %.3fs/iter' % \ (iter, cfg.TRAIN.MAX_ITER, total_loss, rpn_loss_box, rpn_loss_cat, rpn_cross_entropy_pos, rpn_cross_entropy_neg, loss_seg, loss_wd, cur_lr, timer.average_time)) if iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: snapshot(sess, saver, iter) setproctitle.setproctitle( 'train ' + os.path.split(os.path.realpath(__file__))[0] + ' iter:' + str(iter) + " of " + str(cfg.TRAIN.MAX_ITER)) if iter % 100 == 0: summary_writer.add_summary(summuries_str, iter)
def test_net(args): log_name = 'test.logs' logger = QuickLogger(log_dir=cfg.TEST_LOG_ADDR, log_name=log_name).get_logger() tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True inputs = [] inputs.append(tf.placeholder(tf.float32, shape=[1, None, None, 3])) inputs.append(tf.placeholder(tf.float32, shape=[1, 3])) inputs.append(tf.placeholder(tf.float32, shape=[None, 5])) inputs.append(tf.placeholder(tf.uint8, shape=[None, None, None])) inputs.append(tf.placeholder(tf.float32, shape=[])) inputs.append(tf.placeholder(tf.float32, shape=[])) sess = tf.Session(config=tfconfig) net = network_desp.Network() net.inference('TEST', inputs) test_collect_dict = net.get_test_collection() test_collect = [it for it in test_collect_dict.values()] weights_filename = osp.join(args.weights_addr) saver = tf.train.Saver() saver.restore(sess, weights_filename) np.random.seed(cfg.RNG_SEED) dataset = Dataset(isTraining=False) num_images = 300 res = [] _t = {'inference': Timer(), 'im_detect': Timer(), 'misc': Timer()} for i in range(num_images): im, gt_boxes, gt_masks = dataset.forward() _t['im_detect'].tic() cat_prob, boxes, seg_pred, masks = im_detect(sess, net, inputs, im, test_collect, _t) _t['im_detect'].toc() _t['misc'].tic() cls_scores = cat_prob[:, 1] cls_dets = np.hstack((boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) segmaps = np.zeros([len(seg_pred), im.shape[0], im.shape[1]]) img_for_show = copy.deepcopy(im) for k in range(len(seg_pred)): img_for_single_instance = copy.deepcopy(im) segmap = seg_pred[k, :, :, 1] segmap = cv2.resize(segmap, (img_for_single_instance.shape[1], img_for_single_instance.shape[0]), interpolation=cv2.INTER_LANCZOS4) segmap_masked = segmap * masks[k] segmaps[k] = segmap_masked if args.show: color = color_table[k] img_for_show[segmap_masked > 0.5] = 0.8 * color + 0.2 * img_for_show[segmap_masked > 0.5] res.append({'gt_masks':gt_masks, 'segmaps':segmaps, 'scores':cls_scores, 'img':im}) _t['misc'].toc() if args.show: if len(cls_dets) > 0: for record in cls_dets: x0 = record[0] y0 = record[1] x1 = record[2] y1 = record[3] cv2.rectangle(img_for_show, (int(x0), int(y0)), (int(x1), int(y1)), (73, 196, 141), 2) gt = np.zeros([gt_masks[0].shape[0], gt_masks[0].shape[1], 3], gt_masks[0].dtype) for k in range(len(gt_masks)): mask = gt_masks[k].reshape([gt_masks[k].shape[0], gt_masks[k].shape[1], 1]) gt += mask * color_table[k].reshape([1,1,3]) cv2.imwrite('seg.jpg', img_for_show) cv2.imwrite('img.jpg', im) cv2.imwrite('gt.jpg', gt) input('Drawn') logger.info('im_detect: {:d}/{:d} {:.3f}s {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['inference'].average_time, _t['im_detect'].average_time, _t['misc'].average_time)) logger.info('Evaluating detections') map07 = eval(res, 0.7) map05 = eval(res, 0.5) logger.info('mAP07:%f mAP05:%f' % (map07, map05))