def train(): """Train.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) dataset = DataSet(FLAGS.batch_size) images, depths, invalid_depths = dataset.csv_inputs(FLAGS.train_file) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) if FLAGS.refine_train: print("refine train.") coarse = model.inference(images, keep_conv, trainable=False) logits = model.inference_refine(images, coarse, keep_conv, keep_hidden) else: print("coarse train.") logits = model.inference(images, keep_conv, keep_hidden) loss = model.loss(logits, depths, invalid_depths) train_op = op.train(loss, global_step, FLAGS.batch_size) init_op = tf.initialize_all_variables() # Session # sess = tf.Session(config=tf.ConfigProto( # log_device_placement=FLAGS.log_device_placement, # device_count={'GPU': 1})) sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, gpu_options=tf.GPUOptions(visible_device_list='1'))) merged = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) sess.run(init_op) # parameters coarse_params = {} refine_params = {} if FLAGS.refine_train: for variable in tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable print("parameter: %s" % (variable_name)) if variable_name.find('fine') >= 0: refine_params[variable_name] = variable else: for variable in tf.trainable_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: refine_params[variable_name] = variable # define saver print coarse_params saver_coarse = tf.train.Saver(coarse_params) if FLAGS.refine_train: saver_refine = tf.train.Saver(refine_params) # fine tune if FLAGS.fine_tune: coarse_ckpt = tf.train.get_checkpoint_state(FLAGS.coarse_dir) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print("Pretrained coarse Model Loading.") saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) print("Pretrained coarse Model Restored.") else: print("No Pretrained coarse Model.") if FLAGS.refine_train: refine_ckpt = tf.train.get_checkpoint_state(FLAGS.refine_dir) if refine_ckpt and refine_ckpt.model_checkpoint_path: print("Pretrained refine Model Loading.") saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) print("Pretrained refine Model Restored.") else: print("No Pretrained refine Model.") # train coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for step in xrange(FLAGS.max_steps): index = 0 for i in xrange(1000): summary, _, loss_value, logits_val, images_val = sess.run( [merged, train_op, loss, logits, images], feed_dict={ keep_conv: 0.8, keep_hidden: 0.5 }) if index % 10 == 0: print("%s: %d[epoch]: %d[iteration]: train loss %f" % (datetime.now(), step, index, loss_value)) summary_writer.add_summary(summary, 1000 * step + i) assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if index % 500 == 0: if FLAGS.refine_train: output_predict( logits_val, images_val, os.path.join( FLAGS.output_dir, "predict_refine_%05d_%05d" % (step, i))) else: output_predict( logits_val, images_val, os.path.join(FLAGS.output_dir, "predict_%05d_%05d" % (step, i))) index += 1 if step % 5 == 0 or (step * 1) == FLAGS.max_steps: if FLAGS.refine_train: refine_checkpoint_path = FLAGS.refine_dir + '/model.ckpt' saver_refine.save(sess, refine_checkpoint_path, global_step=step) else: coarse_checkpoint_path = FLAGS.coarse_dir + '/model.ckpt' saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) dataset = DataSet(BATCH_SIZE) # Get batch of tensors. # images is X, depth is Y, invalid_depth is matrix of 0 and 1 # that indicates which pixel in Y has depth value. images, depths, invalid_depths = dataset.csv_inputs(TRAIN_FILE) # keep_conv is the drop out rate in conv layers keep_conv = tf.placeholder(tf.float32) # TODO(xuguo): what's keep hidden? keep_hidden = tf.placeholder(tf.float32) if REFINE_TRAIN: # When training with refined network, train with both coarse and # refined together. print("refine train.") coarse = model.inference(images, keep_conv, trainable=True) logits = model.inference_refine(images, coarse, keep_conv, keep_hidden) else: # When training with coarse network, train with only coarse network. # (mpng) this isn't called at all print("coarse train.") logits = model.inference(images, keep_conv, keep_hidden) # define loss function: # logits: the final output after FC layer. # depth: the Y-hat. # invalid_depth: the pixels without depth value. loss = model.loss(logits, depths, invalid_depths) # define trainning function, adam optimization. train_op = op.train(loss, global_step, BATCH_SIZE) # initialize all variable init_op = tf.global_variables_initializer( ) #tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) # parameters # the parameters are use to store checkpoint, so training can resume when exception # happens. coarse_params = {} refine_params = {} print('pre-trained parameters -----------------------------------') if REFINE_TRAIN: for variable in tf.global_variables(): #tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable print("parameter: %s" % (variable_name)) if variable_name.find('fine') >= 0: refine_params[variable_name] = variable else: for variable in tf.trainable_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: refine_params[variable_name] = variable # define saver # print(coarse_params) saver_coarse = tf.train.Saver(coarse_params) if REFINE_TRAIN: saver_refine = tf.train.Saver(refine_params) # fine tune if FINE_TUNE: coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print("Pretrained coarse Model Loading.") saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) print("Pretrained coarse Model Restored.") else: print("No Pretrained coarse Model.") if REFINE_TRAIN: refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: print("Pretrained refine Model Loading.") saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) print("Pretrained refine Model Restored.") else: print("No Pretrained refine Model.") # train with multi-thread. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) loss_curve = [] # step is epoch. for step in range(EPOCH): for i in range(TOTAL_BATCH): _, loss_value, logits_val, images_val = sess.run( [train_op, loss, logits, images], feed_dict={ keep_conv: 0.8, keep_hidden: 0.5 }) if i % 10 == 0: loss_curve.append(loss_value) print("%s: %d[epoch]: %d[iteration]: train loss %f" % (datetime.now(), step, i, loss_value)) assert not np.isnan( loss_value), 'Model diverged with loss = NaN' # save parameters every 10 epoch. if step % 10 == 0 or (step * 1) == EPOCH: with open('loss.txt', 'w') as output: for l in loss_curve: output.write(str(l) + '\n') if REFINE_TRAIN: output_predict(logits_val, images_val, "data/perdict/predict_refine_%05d" % step) # coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' # saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) print("saving refine network checkpoint") refine_checkpoint_path = REFINE_DIR + '/model.ckpt' saver_refine.save(sess, refine_checkpoint_path, global_step=step) else: output_predict(logits_val, images_val, "data/perdict/predict_%05d" % step) print("saving coarse network checkpoint") coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) dataset = DataSet(BATCH_SIZE) images, depths, invalid_depths = dataset.csv_inputs(TRAIN_FILE) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) if REFINE_TRAIN: print("refine train.") coarse = model.inference(images, keep_conv, trainable=False) logits = model.inference_refine(images, coarse, keep_conv, keep_hidden) else: print("coarse train.") logits = model.inference(images, keep_conv, keep_hidden) loss = model.loss(logits, depths, invalid_depths) train_op = op.train(loss, global_step, BATCH_SIZE) init_op = tf.global_variables_initializer( ) #tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) # parameters coarse_params = {} refine_params = {} if REFINE_TRAIN: for variable in tf.global_variables(): #tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable print("parameter: %s" % (variable_name)) if variable_name.find('fine') >= 0: refine_params[variable_name] = variable else: for variable in tf.trainable_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: refine_params[variable_name] = variable # define saver print(coarse_params) saver_coarse = tf.train.Saver(coarse_params) if REFINE_TRAIN: saver_refine = tf.train.Saver(refine_params) # fine tune if FINE_TUNE: coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print("Pretrained coarse Model Loading.") saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) print("Pretrained coarse Model Restored.") else: print("No Pretrained coarse Model.") if REFINE_TRAIN: refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: print("Pretrained refine Model Loading.") saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) print("Pretrained refine Model Restored.") else: print("No Pretrained refine Model.") # train coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for step in range(MAX_STEPS): index = 0 for i in range(1000): _, loss_value, logits_val, images_val = sess.run( [train_op, loss, logits, images], feed_dict={ keep_conv: 0.8, keep_hidden: 0.5 }) if index % 10 == 0: print("%s: %d[epoch]: %d[iteration]: train loss %f" % (datetime.now(), step, index, loss_value)) assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if index % 500 == 0: if REFINE_TRAIN: output_predict( logits_val, images_val, "data/predict_refine_%05d_%05d" % (step, i)) else: output_predict(logits_val, images_val, "data/predict_%05d_%05d" % (step, i)) index += 1 if step % 5 == 0 or (step * 1) == MAX_STEPS: if REFINE_TRAIN: refine_checkpoint_path = REFINE_DIR + '/model.ckpt' saver_refine.save(sess, refine_checkpoint_path, global_step=step) else: coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def test(): with tf.Graph().as_default(): dataset = DataSet(BATCH_SIZE) if (TEST_SINGLE): images, depths, invalid_depths, filenames, depth_filenames = dataset.csv_inputs_test_single( SINGLE_TEST_FILE) else: images, depths, invalid_depths, filenames, depth_filenames = dataset.csv_inputs_test( TEST_FILE) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) if REFINE_TEST: print("refine test.") coarse = model.inference(images, keep_conv, trainable=False) logits = model.inference_refine(images, coarse, keep_conv, keep_hidden, trainable=False) else: print("coarse test.") logits = model.inference(images, keep_conv, trainable=False) loss = model.loss(logits, depths, invalid_depths) init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) # parameters coarse_params = {} refine_params = {} if REFINE_TEST: for variable in tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable print("parameter: %s" % (variable_name)) if variable_name.find('fine') >= 0: refine_params[variable_name] = variable else: for variable in tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable # define saver print coarse_params saver_coarse = tf.train.Saver(coarse_params) if REFINE_TEST: saver_refine = tf.train.Saver(refine_params) # load parameters coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print("Pretrained coarse Model Loading.") saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) print("Pretrained coarse Model Restored.") else: print("No Pretrained coarse Model.") if REFINE_TEST: refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: print("Pretrained refine Model Loading.") saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) print("Pretrained refine Model Restored.") else: print("No Pretrained refine Model.") # test #Start populating the filename queue coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if (TEST_SINGLE): MAX_TEST_STEPS = test_single_number for step in xrange(MAX_TEST_STEPS): ture_depths, logits_val, images_val, loss_val, filenames_val, depth_filenames_val = sess.run( [depths, logits, images, loss, filenames, depth_filenames], feed_dict={ keep_conv: 0.8, keep_hidden: 0.5 }) if REFINE_TEST: if (TEST_SINGLE): output_predict_test_single( ture_depths, logits_val, images_val, filenames_val, depth_filenames_val, "output/test/single_test_refine", step * BATCH_SIZE) else: output_predict_test(ture_depths, logits_val, images_val, filenames_val, depth_filenames_val, "output/test/test_refine", step * BATCH_SIZE) else: if (TEST_SINGLE): output_predict_test_single(ture_depths, logits_val, images_val, filenames_val, depth_filenames_val, "output/test/single_test", step * BATCH_SIZE) else: output_predict_test(ture_depths, logits_val, images_val, filenames_val, depth_filenames_val, "output/test/test", step * BATCH_SIZE) print("%s: %d[step]: test loss %f" % (datetime.now(), step, loss_val)) assert not np.isnan(loss_val), 'Model diverged with loss = NaN' coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # NYU Dataset V2 original size(480 x 640 x 3) -> crop -> (460 x 620 x 3) image_input = ImageInput('./data/nyu_depth_v2_labeled.mat') print("the number of train data: %d" % (len(image_input.images))) images = tf.placeholder(tf.float32, [None, FLAGS.crop_size_height, FLAGS.crop_size_width, FLAGS.image_depth]) depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) invalid_depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput if FLAGS.refine_train: print("refine train.") logits = model.inference_refine(images, keep_conv, keep_hidden) else: print("coarse train.") logits = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 loss = model.loss(logits, depths, invalid_depths) # 学習オペレーション train_op = op.train(loss, global_step) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) # saver #saver = tf.train.Saver(tf.all_variables()) sess.run(init_op) # coarseとrefineを分けて保存 coarse_params = {} refine_params = {} if FLAGS.refine_train: for variable in tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count("/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" % (variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" % (variable_name)) refine_params[variable_name] = variable else: for variable in tf.trainable_variables(): variable_name = variable.name print("parameter: %s" %(variable_name)) if variable_name.find("/") < 0 or variable_name.count("/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" %(variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" %(variable_name)) refine_params[variable_name] = variable # define saver saver_coarse = tf.train.Saver(coarse_params) saver_refine = tf.train.Saver(refine_params) # fine tune if FLAGS.fine_tune: # load coarse paramteters coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print("Pretrained coarse Model Loading.") saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) print("Pretrained coarse Model Restored.") else: print("No Pretrained coarse Model.") # load refine parameters refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: print("Pretrained refine Model Loading.") saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) print("Pretrained refine Model Restored.") else: print("No Pretrained refine Model.") # TODO train coarse or refine (change trainable) #if not FLAGS.coarse_train: # for val in coarse_params: # print val #if not FLAGS.refine_train: # for val in coarse_params: # print val # train refine coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # debug # サマリーのライターを設定 #summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) #batches = image_input.get_batches(FLAGS.batch_size)a #d = np.asarray(batches[0][0]) #print d.shape #a = np.asarray(batches[0][1]) #print a.shape #logits_val, logits_fine_val, loss_value = sess.run([logits, logits_fine, loss], feed_dict={images: batches[0][0], depths: batches[0][1], invalid_depths: batches[0][2], keep_conv: 1.0, keep_hidden: 1.0}) #print len(logits_val[0]) #print len(logits_fine_val[0]) #print loss_value # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 batches = image_input.get_batches(FLAGS.batch_size) vals = image_input.get_validation() for batch in batches: train = batch[0] depth = batch[1] ignore_depth = batch[2] _, loss_value = sess.run([train_op, loss], feed_dict={images: train, depths: depth, invalid_depths: ignore_depth, keep_conv: 0.8, keep_hidden: 0.5}) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 examples_per_sec = num_examples_per_step / duration print("%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/iteration]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if index % 50 == 0: output_vec, cost_value = sess.run([logits, loss], feed_dict={images: vals[0], depths: vals[1], invalid_depths: vals[2], keep_conv: 1.0, keep_hidden: 1.0}) print("%s: %d[epoch]: %d[iteration]: validation loss: %f" % (datetime.now(), step, index, cost_value)) if index % 100 == 0: output_dir = "predicts_%05d_%08d" % (step, index) print("predicts output: %s" % output_dir) data_feed_inputs_nyu.output_predict(output_vec, output_dir) previous_time = end_time index += 1 # if index % 100 == 0: # pass # summary_str = sess.run(summary_op, feed_dict={images: train, labels: label, keep_conv: 0.8, keep_hidden: 0.5}) # # サマリーに書き込む # summary_writer.add_summary(summary_str, step) # if step % 5 == 0 or (step * 1) == MAX_STEPS: if FLAGS.refine_train: refine_checkpoint_path = REFINE_DIR + '/model.ckpt' saver_refine.save(sess, refine_checkpoint_path, global_step=step) else: coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(REFINE_TRAIN): BATCH_SIZE = 8 with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) dataset = DataSet(BATCH_SIZE) keep_conv = tf.placeholder(tf.float32) images, depths, invalid_depths, features = dataset.csv_inputs( TRAIN_FILE) if REFINE_TRAIN: print("refine train.") coarse = model.inference(images, trainable=False) logits = model.inference_refine(images, coarse, keep_conv) #???这个 参数是什么 else: print("coarse train.") logits = model.inference(images) loss = model.loss(logits, depths, invalid_depths) train_op = op.train(loss, global_step, BATCH_SIZE) init_op = tf.global_variables_initializer() #改了 # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) # 不打印设备分配日志 sess.run(init_op) # parametersi coarse_params = {} # 定义一个新的dict refine_params = {} if REFINE_TRAIN: for variable in tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable print("parameter: %s" % (variable_name)) if variable_name.find('fine') >= 0: refine_params[variable_name] = variable else: for variable in tf.trainable_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: refine_params[variable_name] = variable # define saver print(coarse_params) saver_coarse = tf.train.Saver(coarse_params) if REFINE_TRAIN: saver_refine = tf.train.Saver(refine_params) # fine tune 微调。。。 if FINE_TUNE: coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print(coarse_ckpt.model_checkpoint_path) saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) else: print("No Pretrained coarse Model.") if REFINE_TRAIN: refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) else: print("No Pretrained refine Model.") # train coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) lossli = [] lossli1 = [] for step in range(MAX_STEPS): index = 0 lossli = [] print('-------------------------------') for i in range(3000): _, loss_value, logits_val, images_val = sess.run( [train_op, loss, logits, images], feed_dict={keep_conv: 0.8}) if i % 100 == 0: print('[Epoch]:', step, '[iteration]:', i, '[Train losses]:', loss_value) lossli.append(loss_value) index += 1 lossli1.append(np.mean(lossli)) if step % 5 == 0 or (step * 1) == MAX_STEPS: if REFINE_TRAIN: refine_checkpoint_path = REFINE_DIR + '/model.ckpt' saver_refine.save(sess, refine_checkpoint_path, global_step=step) else: coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) plt.figure() plt.plot(lossli1) plt.savefig("train_loss.jpg") plt.xlabel("Epoch") plt.ylabel("Train_loss") plt.title("Train_Loss for Each Epoch") coord.request_stop() #请求所有线程停止 coord.join(threads) #等待所有的线程完成 sess.close()
def test(): BATCH_SIZE = 1 with tf.Graph().as_default(): dataset = DataSet(BATCH_SIZE) keep_conv = tf.placeholder(tf.float32) images, depths, invalid_depths, features = dataset.csv_inputs( TEST_FILE) coarse = model.inference(images, trainable=False) logits = model.inference_refine(images, coarse, keep_conv, trainable=False) loss1 = model.loss(coarse, depths, invalid_depths) loss2 = model.loss(logits, depths, invalid_depths) init_op = tf.global_variables_initializer() #改了 # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) # 不打印设备分配日志 sess.run(init_op) coarse_params = {} # 定义一个新的dict refine_params = {} for variable in tf.all_variables(): variable_name = variable.name #print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count("/") != 1: continue if variable_name.find('coarse') >= 0: coarse_params[variable_name] = variable #print("parameter: %s" %(variable_name)) if variable_name.find('fine') >= 0: refine_params[variable_name] = variable saver_coarse = tf.train.Saver(coarse_params) saver_refine = tf.train.Saver(refine_params) # fine tune 微调。。。 if FINE_TUNE: coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: #print(coarse_ckpt.model_checkpoint_path) saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: #print(refine_ckpt.model_checkpoint_path) saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) # test coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) index = 0 ls1 = [] ls2 = [] print('\n', '---------Examples---------:') for step in range(NumOfTest): #print('-----------------------------------------') loss_value1, loss_value2, logits_val, coarse_val, images_val, features_ = sess.run( [loss1, loss2, logits, coarse, images, features], feed_dict={keep_conv: 1}) ls1.append(loss_value1) ls2.append(loss_value2) if step % 1 == 0: index = index + 1 print(features_, 'Coarse losses:', loss_value1, 'Refine losses:', loss_value2, '\n') output_save(coarse_val, logits_val, images_val, index, "data/test") ls1m = np.mean(ls1) ls2m = np.mean(ls2) print('---------Testing Results--------:') print('Coasre image mean losses:', ls1m) print('Refine image mean losses:', ls2m) coord.request_stop() #请求所有线程停止 coord.join(threads) #等待所有的线程完成 sess.close()
def train(): ''' Train ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # NYU Dataset V2 original size(480 x 640 x 3) -> crop -> (460 x 620 x 3) image_input = ImageInput('./data/nyu_depth_v2_labeled.mat') print("the number of train data: %d" % (len(image_input.images))) images = tf.placeholder(tf.float32, [ None, FLAGS.crop_size_height, FLAGS.crop_size_width, FLAGS.image_depth ]) depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) invalid_depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput if FLAGS.refine_train: print("refine train.") logits = model.inference_refine(images, keep_conv, keep_hidden) else: print("coarse train.") logits = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 loss = model.loss(logits, depths, invalid_depths) # 学習オペレーション train_op = op.train(loss, global_step) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) # saver #saver = tf.train.Saver(tf.all_variables()) sess.run(init_op) # coarseとrefineを分けて保存 coarse_params = {} refine_params = {} if FLAGS.refine_train: for variable in tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" % (variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" % (variable_name)) refine_params[variable_name] = variable else: for variable in tf.trainable_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" % (variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" % (variable_name)) refine_params[variable_name] = variable # define saver saver_coarse = tf.train.Saver(coarse_params) saver_refine = tf.train.Saver(refine_params) # fine tune if FLAGS.fine_tune: # load coarse paramteters coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print("Pretrained coarse Model Loading.") saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) print("Pretrained coarse Model Restored.") else: print("No Pretrained coarse Model.") # load refine parameters refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: print("Pretrained refine Model Loading.") saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) print("Pretrained refine Model Restored.") else: print("No Pretrained refine Model.") # TODO train coarse or refine (change trainable) #if not FLAGS.coarse_train: # for val in coarse_params: # print val #if not FLAGS.refine_train: # for val in coarse_params: # print val # train refine coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # debug # サマリーのライターを設定 #summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) #batches = image_input.get_batches(FLAGS.batch_size)a #d = np.asarray(batches[0][0]) #print d.shape #a = np.asarray(batches[0][1]) #print a.shape #logits_val, logits_fine_val, loss_value = sess.run([logits, logits_fine, loss], feed_dict={images: batches[0][0], depths: batches[0][1], invalid_depths: batches[0][2], keep_conv: 1.0, keep_hidden: 1.0}) #print len(logits_val[0]) #print len(logits_fine_val[0]) #print loss_value # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 batches = image_input.get_batches(FLAGS.batch_size) vals = image_input.get_validation() for batch in batches: train = batch[0] depth = batch[1] ignore_depth = batch[2] _, loss_value = sess.run( [train_op, loss], feed_dict={ images: train, depths: depth, invalid_depths: ignore_depth, keep_conv: 0.8, keep_hidden: 0.5 }) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 examples_per_sec = num_examples_per_step / duration print( "%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/iteration]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if index % 50 == 0: output_vec, cost_value = sess.run( [logits, loss], feed_dict={ images: vals[0], depths: vals[1], invalid_depths: vals[2], keep_conv: 1.0, keep_hidden: 1.0 }) print("%s: %d[epoch]: %d[iteration]: validation loss: %f" % (datetime.now(), step, index, cost_value)) if index % 100 == 0: output_dir = "predicts_%05d_%08d" % (step, index) print("predicts output: %s" % output_dir) data_feed_inputs_nyu.output_predict( output_vec, output_dir) previous_time = end_time index += 1 # if index % 100 == 0: # pass # summary_str = sess.run(summary_op, feed_dict={images: train, labels: label, keep_conv: 0.8, keep_hidden: 0.5}) # # サマリーに書き込む # summary_writer.add_summary(summary_str, step) # if step % 5 == 0 or (step * 1) == MAX_STEPS: if FLAGS.refine_train: refine_checkpoint_path = REFINE_DIR + '/model.ckpt' saver_refine.save(sess, refine_checkpoint_path, global_step=step) else: coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()