[227, 0, 144], [2, 140, 210], [255, 225, 2], [255, 255, 255] ], dtype=np.float32) if __name__ == '__main__': model_config = configuration.MODEL_CONFIG train_config = configuration.TRAIN_CONFIG infer_size = (736, 960) g = tf.Graph() with g.as_default(): # Build the test model model = BiseNet(model_config, None, 5, 'inference') model.build() response = model.response saver = tf.train.Saver() # Dynamically allocate GPU memory gpu_options = tf.GPUOptions(allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(config=sess_config) model_path = tf.train.latest_checkpoint(train_config['train_dir']) # global_variables_init_op = tf.global_variables_initializer() # local_variables_init_op = tf.local_variables_initializer() # sess.run(local_variables_init_op)
def main(model_config, train_config): os.environ['CUDA_VISIBLE_DEVICES'] = auto_select_gpu() # Create training directory which will be used to save: configurations, model files, TensorBoard logs train_dir = train_config['train_dir'] if not osp.isdir(train_dir): logging.info('Creating training directory: %s', train_dir) mkdir_p(train_dir) g = tf.Graph() with g.as_default(): # Set fixed seed for reproducible experiments random.seed(train_config['seed']) np.random.seed(train_config['seed']) tf.set_random_seed(train_config['seed']) # Build the training and validation model model = BiseNet(model_config, train_config, num_classes, mode="train") # print('I am hereeeeeeeeeeeeeee') # exit() model.build() model_va = BiseNet(model_config, train_config, num_classes, mode="validation") model_va.build(reuse=True) # Save configurations for future reference save_cfgs(train_dir, model_config, train_config) learning_rate = _configure_learning_rate(train_config, model.global_step) optimizer = _configure_optimizer(train_config, learning_rate) tf.summary.scalar('learning_rate', learning_rate) # Set up the training ops update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = tf.contrib.layers.optimize_loss( loss=model.total_loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=optimizer, clip_gradients=train_config['clip_gradients'], learning_rate_decay_fn=None, summaries=['learning_rate']) saver = tf.train.Saver( tf.global_variables(), max_to_keep=train_config['max_checkpoints_to_keep']) summary_writer = tf.summary.FileWriter(train_dir, g) summary_op = tf.summary.merge_all() global_variables_init_op = tf.global_variables_initializer() local_variables_init_op = tf.local_variables_initializer() g.finalize() # Finalize graph to avoid adding ops by mistake # Dynamically allocate GPU memory gpu_options = tf.GPUOptions(allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(config=sess_config) model_path = tf.train.latest_checkpoint(train_config['train_dir']) if not model_path: sess.run(global_variables_init_op) sess.run(local_variables_init_op) start_step = 0 if model_config['frontend_config'][ 'pretrained_dir'] and model.init_fn: model.init_fn(sess) else: logging.info('Restore from last checkpoint: {}'.format(model_path)) sess.run(local_variables_init_op) saver.restore(sess, model_path) start_step = tf.train.global_step(sess, model.global_step.name) + 1 # Training loop data_config = train_config['train_data_config'] total_steps = int(data_config['epoch'] * data_config['num_examples_per_epoch'] / data_config['batch_size']) logging.info('Train for {} steps'.format(total_steps)) for step in range(start_step, total_steps): start_time = time.time() _, predict_loss, loss = sess.run( [train_op, model.loss, model.total_loss]) duration = time.time() - start_time if step % 10 == 0: examples_per_sec = data_config['batch_size'] / float(duration) time_remain = data_config['batch_size'] * ( total_steps - step) / examples_per_sec m, s = divmod(time_remain, 60) h, m = divmod(m, 60) format_str = ( '%s: step %d, total loss = %.2f, predict loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch; %dh:%02dm:%02ds remains)') logging.info(format_str % (datetime.now(), step, loss, predict_loss, examples_per_sec, duration, h, m, s)) if step % 10 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step % train_config['save_model_every_n_step'] == 0 or ( step + 1) == total_steps: checkpoint_path = osp.join(train_config['train_dir'], 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def main(model_config, train_config): # Create training directory which will be used to save: configurations, model files, TensorBoard logs train_dir = train_config['train_dir'] if not osp.isdir(train_dir): logging.info('Creating training directory: %s', train_dir) mkdir_p(train_dir) g = tf.Graph() with g.as_default(): # Set fixed seed for reproducible experiments random.seed(train_config['seed']) np.random.seed(train_config['seed']) tf.set_random_seed(train_config['seed']) # Build the training and validation model model = BiseNet(model_config, train_config, train_config['num_classes'], mode="train") model.build(num_gpus=configuration.num_gpus, reuse=tf.AUTO_REUSE) model_va = BiseNet(model_config, train_config, train_config['num_classes'], mode="validation") model_va.build(reuse=True) # Save configurations for future reference save_cfgs(train_dir, model_config, train_config) learning_rate = _configure_learning_rate(train_config, model.global_step) optimizer = _configure_optimizer(train_config, learning_rate) tf.summary.scalar('learning_rate', learning_rate) # Set up the training ops tower_grads = [] for i in range(configuration.num_gpus): with tf.device('/gpu:%d' % i): name_scope = ('clone_%d' % i) if i else '' with tf.name_scope(name_scope) as scope: grads = optimizer.compute_gradients(model.total_loss[i]) tower_grads.append(grads) with tf.device('/cpu:0'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): grads_n_vars = _average_gradients(tower_grads) grad_updates = optimizer.apply_gradients( grads_n_vars, global_step=model.global_step) model.total_loss = tf.reduce_mean(model.total_loss) # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # update_ops.append(grad_updates) # update_op = tf.group(*update_ops) # with tf.control_dependencies(update_ops): # train_op = tf.contrib.layers.optimize_loss(loss=model.total_loss, # global_step=model.global_step, # learning_rate=learning_rate, # optimizer=optimizer, # clip_gradients=train_config['clip_gradients'], # learning_rate_decay_fn=None, # summaries=['learning_rate']) saver = tf.train.Saver( tf.global_variables(), max_to_keep=train_config['max_checkpoints_to_keep']) summary_writer = tf.summary.FileWriter(train_dir, g) summary_op = tf.summary.merge_all() global_variables_init_op = tf.global_variables_initializer() local_variables_init_op = tf.local_variables_initializer() g.finalize() # Finalize graph to avoid adding ops by mistake # Dynamically allocate GPU memory # gpu_options = tf.GPUOptions(allow_growth=True) # sess_config = tf.ConfigProto(gpu_options=gpu_options) # for multi gpu options. 'allow_soft_placement' must be set true sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = False sess = tf.Session(config=sess_config) model_path = tf.train.latest_checkpoint(train_config['train_dir']) if not model_path: sess.run(global_variables_init_op) sess.run(local_variables_init_op) start_step = 0 if model_config['frontend_config'][ 'pretrained_dir'] and model.init_fn: model.init_fn(sess) else: logging.info('Restore from last checkpoint: {}'.format(model_path)) sess.run(local_variables_init_op) saver.restore(sess, model_path) start_step = tf.train.global_step(sess, model.global_step.name) + 1 # Training loop data_config = train_config['train_data_config'] total_steps = int(data_config['epoch'] * data_config['num_examples_per_epoch'] / data_config['batch_size']) logging.info('Train for {} steps'.format(total_steps)) for step in range(start_step, total_steps): start_time = time.time() # _, loss = sess.run([train_op, model.total_loss]) _, loss = sess.run([grad_updates, model.total_loss]) duration = time.time() - start_time if step % 10 == 0: examples_per_sec = data_config['batch_size'] / float(duration) time_remain = data_config['batch_size'] * ( total_steps - step) / examples_per_sec m, s = divmod(time_remain, 60) h, m = divmod(m, 60) format_str = ( '%s: step %d, total loss = %.2f, (%.1f examples/sec; %.3f ' 'sec/batch; %dh:%02dm:%02ds remains)') logging.info(format_str % (datetime.now(), step, loss, examples_per_sec, duration, h, m, s)) if step % 10 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step % train_config['save_model_every_n_step'] == 0 or ( step + 1) == total_steps: checkpoint_path = osp.join(train_config['train_dir'], 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
import tensorflow as tf from models.bisenet import BiseNet import configuration import logging logging.getLogger().setLevel(logging.INFO) if __name__ == '__main__': model_config = configuration.MODEL_CONFIG train_config = configuration.TRAIN_CONFIG g = tf.Graph() with g.as_default(): # Build the test model model = BiseNet(model_config, train_config, 32, 'test') model.build() saver = tf.train.Saver() summary_writer = tf.summary.FileWriter( train_config['test_data_config']['test_dir'], g) summary_op = tf.summary.merge_all() # Dynamically allocate GPU memory gpu_options = tf.GPUOptions(allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(config=sess_config) model_path = tf.train.latest_checkpoint(train_config['train_dir']) config = train_config['test_data_config']