def main(_): if gfile.Exists(TRAIN_DIR): gfile.DeleteRecursively(TRAIN_DIR) gfile.MakeDirs(TRAIN_DIR) # locally #train() print("ps: %s" % (DFLAGS.task_index)) ps_hosts = DFLAGS.ps_hosts.split(",") worker_hosts = DFLAGS.worker_hosts.split(",") # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) # training data filename_queue = tf.train.string_input_producer( ["output/data/airquality.csv"]) datas, targets = dataset.mini_batch(filename_queue, BATCH_SIZE) if DFLAGS.job_name == "ps": server.join() elif DFLAGS.job_name == "worker": # Assigns ops to the local worker by default. with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % DFLAGS.task_index, cluster=cluster)): # step num of global global_step = tf.Variable(0, trainable=False) # inference logits = model.inference(datas) debug_value = model.debug(logits) # loss graphのoutputとlabelを利用 loss = model.loss(logits, targets) global_step = tf.Variable(0) #train_op = tf.train.AdagradOptimizer(0.0001).minimize( # loss, global_step=global_step) train_op = op.train(loss, global_step) saver = tf.train.Saver() summary_op = tf.merge_all_summaries() init_op = tf.initialize_all_variables() # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0), logdir="/tmp/train_logs", init_op=init_op, init_feed_dict=None, summary_op=summary_op, saver=saver, global_step=global_step, save_model_secs=60) # The supervisor takes care of session initialization, restoring from # a checkpoint, and closing when done or an error occurs. with sv.managed_session(server.target) as sess: # Loop until the supervisor shuts down or 1000000 steps have completed. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) step = 0 while not sv.should_stop() and step < 1000000: # Run a training step asynchronously. # See `tf.train.SyncReplicasOptimizer` for additional details on how to # perform *synchronous* training. start_time = time.time() _, loss_value, predict_value, targets_eval, step = sess.run( [train_op, loss, debug_value, targets, global_step]) #_, step = sess.run([train_op, global_step]) duration = time.time() - start_time if step % 100 == 0: # mini batch size num_examples_per_step = BATCH_SIZE # examples num per sec examples_per_sec = num_examples_per_step / duration # duration per batch sec_per_batch = float(duration) # time, step num, loss, exampeles num per sec, time per batch format_str = '$s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' print str(datetime.now()) + ': step' + str( step) + ', loss= ' + str(loss_value) + ' ' + str( examples_per_sec) + ' examples/sec; ' + str( sec_per_batch) + ' sec/batch' print "predict: ", predict_value print "targets: ", targets_eval coord.request_stop() coord.join(threads) sess.close() # Ask for all the services to stop. sv.stop()
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ #images, labels = data_inputs.distorted_inputs(TF_RECORDS) # 教師データ mnist = np.load('./data/mnist_sequence1_sample_5distortions5x5.npz') trX = mnist['X_train'] trY = mnist['y_train'] # X_valid = mnist_cluttered['X_valid'] # y_valid = mnist_cluttered['y_valid'] teX = mnist['X_test'] teY = mnist['y_test'] trX = trX.reshape(-1, 40, 40, 1) teX = teX.reshape(-1, 40, 40, 1) # % turn from dense to one hot representation trY = dense_to_one_hot(trY, n_classes=10) # Y_valid = dense_to_one_hot(y_valid, n_classes=10) teY = dense_to_one_hot(teY, n_classes=10) print("the number of train data: %d" % (len(trX))) # create mini_batch #datas, targets = trX.(trX, trY, BATCH_SIZE) images = tf.placeholder(tf.float32, [None, 40, 40, 1]) labels = tf.placeholder(tf.float32, [None, 10]) keep_conv = tf.placeholder("float") keep_hidden = tf.placeholder("float") # graphのoutput logits = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 #loss = model.loss(logits, labels) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels)) predict_op = tf.argmax(logits, 1) # 学習オペレーション train_op = op.train(loss, global_step) # saver saver = tf.train.Saver(tf.all_variables()) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 for start, end in zip(range(0, len(trX), BATCH_SIZE), range(BATCH_SIZE, len(trX), BATCH_SIZE)): _, loss_value = sess.run([train_op, loss], feed_dict={images: trX[start:end], labels: trY[start:end], keep_conv: 0.8, keep_hidden: 0.5}) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 * (step+1) examples_per_sec = num_examples_per_step / duration print("%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/step]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) index += 1 assert not np.isnan(loss_value), 'Model diverged with loss = NaN' test_indices = np.arange(len(teX)) # Get A Test Batch np.random.shuffle(test_indices) test_indices = test_indices[0:5] print "="*20 print teY[test_indices] predict, cost_value = sess.run([predict_op, loss], feed_dict={images: teX[test_indices], labels: teY[test_indices], keep_conv: 1.0, keep_hidden: 1.0}) print predict print("test loss: %f" % (cost_value)) print "="*20 previous_time = end_time index += 1 assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # 1000回ごと if index % 100 == 0: pass summary_str = sess.run(summary_op, feed_dict={images: teX[test_indices], labels: teY[test_indices], keep_conv: 1.0, keep_hidden: 1.0}) # サマリーに書き込む summary_writer.add_summary(summary_str, step) if step % 1 == 0 or (step * 1) == MAX_STEPS: checkpoint_path = TRAIN_DIR + '/model.ckpt' saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ filename_queue = tf.train.string_input_producer( ["data/airquality.csv"]) datas, targets = load.mini_batch(filename_queue, BATCH_SIZE) # placeholder x = tf.placeholder(tf.float32, shape=[None, 5]) y = tf.placeholder(tf.float32, shape=[None, 1]) # graphのoutput logits = model.inference(x) debug_value = model.debug(logits) # loss graphのoutputとlabelを利用 loss = model.loss(logits, y) # 学習オペレーション train_op = op.train(loss, global_step) # saver saver = tf.train.Saver(tf.all_variables()) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) print("settion start.") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # model名 model_name = '/model%s.ckpt' % (tdatetime.strftime('%Y%m%d%H%M%S')) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() a, b = sess.run([datas, targets]) _, loss_value, predict_value = sess.run( [train_op, loss, debug_value], feed_dict={ x: a, y: b }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # 100回ごと if step % 100 == 0: # stepごとの事例数 = mini batch size num_examples_per_step = BATCH_SIZE # 1秒ごとの事例数 examples_per_sec = num_examples_per_step / duration # バッチごとの時間 sec_per_batch = float(duration) # time, step数, loss, 1秒で実行できた事例数, バッチあたりの時間 format_str = '$s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' print str(datetime.now()) + ': step' + str( step) + ', loss= ' + str(loss_value) + ' ' + str( examples_per_sec) + ' examples/sec; ' + str( sec_per_batch) + ' sec/batch' print "x", a print "ground truth:", b print "predict: ", predict_value # 100回ごと if step % 100 == 0: pass #summary_str = sess.run(summary_op) # サマリーに書き込む #summary_writer.add_summary(summary_str, step) if step % 1000 == 0 or (step * 1) == MAX_STEPS: checkpoint_path = TRAIN_DIR + model_name saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ image_input = ImageInput('./data/101Caltech_shuffles.txt') print("the number of train data: %d" % (len(image_input.image_paths))) images = tf.placeholder(tf.float32, [None, 224, 224, 3]) labels = tf.placeholder(tf.float32, [None, 101]) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput logits, transform_result = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 # loss = model.loss(logits, labels) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, labels)) predict_op = tf.argmax(logits, 1) # 学習オペレーション train_op = op.train(loss, global_step) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) # saver #saver = tf.train.Saver(tf.all_variables()) sess.run(init_op) # pretrainと全体を分けて保存 pretrain_params = {} train_params = {} for variable in tf.trainable_variables(): variable_name = variable.name #print("parameter: %s" %(variable_name)) scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('spatial_transformer') < 0: print("pretrain parameter: %s" % (variable_name)) pretrain_params[variable_name] = variable print("train parameter: %s" % (variable_name)) train_params[variable_name] = variable saver_cnn = tf.train.Saver(pretrain_params) saver_transformers = tf.train.Saver(train_params) # pretrained_model if FLAGS.fine_tune: ckpt = tf.train.get_checkpoint_state(PRETRAIN_DIR) if ckpt and ckpt.model_checkpoint_path: print("Pretrained Model Loading.") saver_cnn.restore(sess, ckpt.model_checkpoint_path) print("Pretrained Model Restored.") else: print("No Pretrained Model.") # pretrained model from another type models. # # saver # print type(tf.all_variables()) # for variable in tf.trainable_variables(): # variable_name = variable.name # variable_value = variable.eval(sess) # if variable_name.find('softmax_linear') < 0 and variable_name.find('spatial_transformer') < 0: # print("trained parameter: %s" %(variable_name)) # scope, name = variable_name.split("/") # target, _ = name.split(":") # with tf.variable_scope(scope, reuse=True): # sess.run(tf.get_variable(target).assign(variable_value)) # trained_model = FLAGS.trained_model # print trained_model coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 batches = image_input.get_batches(FLAGS.batch_size) for batch in batches: train = batch[0] label = batch[1] _, loss_value = sess.run([train_op, loss], feed_dict={ images: train, labels: label, keep_conv: 0.8, keep_hidden: 0.5 }) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 examples_per_sec = num_examples_per_step / duration print( "%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/iteration]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) index += 1 assert not np.isnan( loss_value), 'Model diverged with loss = NaN' # test_indices = np.arange(len(teX)) # Get A Test Batch # np.random.shuffle(test_indices) # test_indices = test_indices[0:5] print "=" * 20 testx = train[0:2] #print testx testy = label[0:2] print np.argmax(testy[0]) print np.argmax(testy[1]) output_vec, predict, cost_value = sess.run( [logits, predict_op, loss], feed_dict={ images: testx, labels: testy, keep_conv: 1.0, keep_hidden: 1.0 }) print predict print("test loss: %f" % (cost_value)) print "=" * 20 previous_time = end_time index += 1 assert not np.isnan( loss_value), 'Model diverged with loss = NaN' # 100回ごと if index % 100 == 0: pass summary_str = sess.run(summary_op, feed_dict={ images: train, labels: label, keep_conv: 0.8, keep_hidden: 0.5 }) # サマリーに書き込む summary_writer.add_summary(summary_str, step) if step % 1 == 0 or (step * 1) == MAX_STEPS: pretrain_checkpoint_path = PRETRAIN_DIR + '/model.ckpt' train_checkpoint_path = TRAIN_DIR + '/model.ckpt' saver_cnn.save(sess, pretrain_checkpoint_path, global_step=step) saver_transformers.save(sess, train_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ filename_queue = tf.train.string_input_producer(["data/airquality.csv"]) datas, targets = load.mini_batch(filename_queue, BATCH_SIZE) # placeholder x = tf.placeholder(tf.float32, shape=[None, 5]) y = tf.placeholder(tf.float32, shape=[None, 1]) # graphのoutput logits = model.inference(x) debug_value = model.debug(logits) # loss graphのoutputとlabelを利用 loss = model.loss(logits, y) # 学習オペレーション train_op = op.train(loss, global_step) # saver saver = tf.train.Saver(tf.all_variables()) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) print("settion start.") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # model名 model_name = '/model%s.ckpt' % (tdatetime.strftime('%Y%m%d%H%M%S')) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() a, b = sess.run([datas, targets]) _, loss_value, predict_value = sess.run([train_op, loss, debug_value], feed_dict={x: a, y: b}) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # 100回ごと if step % 100 == 0: # stepごとの事例数 = mini batch size num_examples_per_step = BATCH_SIZE # 1秒ごとの事例数 examples_per_sec = num_examples_per_step / duration # バッチごとの時間 sec_per_batch = float(duration) # time, step数, loss, 1秒で実行できた事例数, バッチあたりの時間 format_str = '$s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' print str(datetime.now()) + ': step' + str(step) + ', loss= '+ str(loss_value) + ' ' + str(examples_per_sec) + ' examples/sec; ' + str(sec_per_batch) + ' sec/batch' print "x", a print "ground truth:", b print "predict: ", predict_value # 100回ごと if step % 100 == 0: pass #summary_str = sess.run(summary_op) # サマリーに書き込む #summary_writer.add_summary(summary_str, step) if step % 1000 == 0 or (step * 1) == MAX_STEPS: checkpoint_path = TRAIN_DIR + model_name saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ images, labels = data_inputs.distorted_inputs(TF_RECORDS) # graphのoutput logits = model.inference(images) # loss graphのoutputとlabelを利用 loss = model.loss(logits, labels) # 学習オペレーション train_op = op.train(loss, global_step) # saver saver = tf.train.Saver(tf.all_variables()) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # model名 model_name = '/model%s.ckpt' % (tdatetime.strftime('%Y%m%d%H%M%S')) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # 100回ごと if step % 100 == 0: # stepごとの事例数 = mini batch size num_examples_per_step = BATCH_SIZE # 1秒ごとの事例数 examples_per_sec = num_examples_per_step / duration # バッチごとの時間 sec_per_batch = float(duration) # time, step数, loss, 1秒で実行できた事例数, バッチあたりの時間 format_str = '$s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' print str(datetime.now()) + ': step' + str( step) + ', loss= ' + str(loss_value) + ' ' + str( examples_per_sec) + ' examples/sec; ' + str( sec_per_batch) + ' sec/batch' # 100回ごと if step % 100 == 0: pass summary_str = sess.run(summary_op) # サマリーに書き込む summary_writer.add_summary(summary_str, step) if step % 1000 == 0 or (step * 1) == MAX_STEPS: checkpoint_path = TRAIN_DIR + model_name saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # NYU Dataset V2 original size(480 x 640 x 3) -> crop -> (460 x 620 x 3) image_input = ImageInput('./data/nyu_depth_v2_labeled.mat') print("the number of train data: %d" % (len(image_input.images))) images = tf.placeholder(tf.float32, [None, FLAGS.crop_size_height, FLAGS.crop_size_width, FLAGS.image_depth]) depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) invalid_depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput if FLAGS.refine_train: print("refine train.") logits = model.inference_refine(images, keep_conv, keep_hidden) else: print("coarse train.") logits = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 loss = model.loss(logits, depths, invalid_depths) # 学習オペレーション train_op = op.train(loss, global_step) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) # saver #saver = tf.train.Saver(tf.all_variables()) sess.run(init_op) # coarseとrefineを分けて保存 coarse_params = {} refine_params = {} if FLAGS.refine_train: for variable in tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count("/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" % (variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" % (variable_name)) refine_params[variable_name] = variable else: for variable in tf.trainable_variables(): variable_name = variable.name print("parameter: %s" %(variable_name)) if variable_name.find("/") < 0 or variable_name.count("/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" %(variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" %(variable_name)) refine_params[variable_name] = variable # define saver saver_coarse = tf.train.Saver(coarse_params) saver_refine = tf.train.Saver(refine_params) # fine tune if FLAGS.fine_tune: # load coarse paramteters coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print("Pretrained coarse Model Loading.") saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) print("Pretrained coarse Model Restored.") else: print("No Pretrained coarse Model.") # load refine parameters refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: print("Pretrained refine Model Loading.") saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) print("Pretrained refine Model Restored.") else: print("No Pretrained refine Model.") # TODO train coarse or refine (change trainable) #if not FLAGS.coarse_train: # for val in coarse_params: # print val #if not FLAGS.refine_train: # for val in coarse_params: # print val # train refine coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # debug # サマリーのライターを設定 #summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) #batches = image_input.get_batches(FLAGS.batch_size)a #d = np.asarray(batches[0][0]) #print d.shape #a = np.asarray(batches[0][1]) #print a.shape #logits_val, logits_fine_val, loss_value = sess.run([logits, logits_fine, loss], feed_dict={images: batches[0][0], depths: batches[0][1], invalid_depths: batches[0][2], keep_conv: 1.0, keep_hidden: 1.0}) #print len(logits_val[0]) #print len(logits_fine_val[0]) #print loss_value # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 batches = image_input.get_batches(FLAGS.batch_size) vals = image_input.get_validation() for batch in batches: train = batch[0] depth = batch[1] ignore_depth = batch[2] _, loss_value = sess.run([train_op, loss], feed_dict={images: train, depths: depth, invalid_depths: ignore_depth, keep_conv: 0.8, keep_hidden: 0.5}) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 examples_per_sec = num_examples_per_step / duration print("%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/iteration]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if index % 50 == 0: output_vec, cost_value = sess.run([logits, loss], feed_dict={images: vals[0], depths: vals[1], invalid_depths: vals[2], keep_conv: 1.0, keep_hidden: 1.0}) print("%s: %d[epoch]: %d[iteration]: validation loss: %f" % (datetime.now(), step, index, cost_value)) if index % 100 == 0: output_dir = "predicts_%05d_%08d" % (step, index) print("predicts output: %s" % output_dir) data_feed_inputs_nyu.output_predict(output_vec, output_dir) previous_time = end_time index += 1 # if index % 100 == 0: # pass # summary_str = sess.run(summary_op, feed_dict={images: train, labels: label, keep_conv: 0.8, keep_hidden: 0.5}) # # サマリーに書き込む # summary_writer.add_summary(summary_str, step) # if step % 5 == 0 or (step * 1) == MAX_STEPS: if FLAGS.refine_train: refine_checkpoint_path = REFINE_DIR + '/model.ckpt' saver_refine.save(sess, refine_checkpoint_path, global_step=step) else: coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ #images, labels = data_inputs.distorted_inputs(TF_RECORDS) # 教師データ mnist = np.load('./data/mnist_sequence1_sample_5distortions5x5.npz') trX = mnist['X_train'] trY = mnist['y_train'] # X_valid = mnist_cluttered['X_valid'] # y_valid = mnist_cluttered['y_valid'] teX = mnist['X_test'] teY = mnist['y_test'] trX = trX.reshape(-1, 40, 40, 1) teX = teX.reshape(-1, 40, 40, 1) # % turn from dense to one hot representation trY = dense_to_one_hot(trY, n_classes=10) trY = trY.reshape(-1, 10) # Y_valid = dense_to_one_hot(y_valid, n_classes=10) teY = dense_to_one_hot(teY, n_classes=10) teY = teY.reshape(-1, 10) print("the number of train data: %d" % (len(trX))) # create mini_batch #datas, targets = trX.(trX, trY, BATCH_SIZE) images = tf.placeholder(tf.float32, [None, 40, 40, 1]) labels = tf.placeholder(tf.float32, [None, 10]) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput logits = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 #loss = model.loss(logits, labels) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, labels)) predict_op = tf.argmax(logits, 1) # 学習オペレーション train_op = op.train(loss, global_step) # saver saver = tf.train.Saver(tf.all_variables()) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 for start, end in zip(range(0, len(trX), BATCH_SIZE), range(BATCH_SIZE, len(trX), BATCH_SIZE)): _, loss_value = sess.run( [train_op, loss], feed_dict={ images: trX[start:end], labels: trY[start:end], keep_conv: 0.8, keep_hidden: 0.5 }) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 * (step + 1) examples_per_sec = num_examples_per_step / duration print( "%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/step]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) index += 1 assert not np.isnan( loss_value), 'Model diverged with loss = NaN' test_indices = np.arange(len(teX)) # Get A Test Batch np.random.shuffle(test_indices) test_indices = test_indices[0:5] print "=" * 20 print teY[test_indices] predict, cost_value = sess.run( [predict_op, loss], feed_dict={ images: teX[test_indices], labels: teY[test_indices], keep_conv: 1.0, keep_hidden: 1.0 }) print predict print("test loss: %f" % (cost_value)) print "=" * 20 previous_time = end_time index += 1 assert not np.isnan( loss_value), 'Model diverged with loss = NaN' # 1000回ごと if index % 100 == 0: pass summary_str = sess.run(summary_op, feed_dict={ images: trX[start:end], labels: trY[start:end], keep_conv: 0.8, keep_hidden: 0.5 }) # サマリーに書き込む summary_writer.add_summary(summary_str, step) if step % 1 == 0 or (step * 1) == MAX_STEPS: checkpoint_path = TRAIN_DIR + '/model.ckpt' saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ image_input = ImageInput('./data/101Caltech_shuffles.txt') print("the number of train data: %d" % (len(image_input.image_paths))) images = tf.placeholder(tf.float32, [None, 224, 224, 3]) labels = tf.placeholder(tf.float32, [None, 101]) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput logits, transform_result = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 # loss = model.loss(logits, labels) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels)) predict_op = tf.argmax(logits, 1) # 学習オペレーション train_op = op.train(loss, global_step) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) # saver #saver = tf.train.Saver(tf.all_variables()) sess.run(init_op) # pretrainと全体を分けて保存 pretrain_params = {} train_params = {} for variable in tf.trainable_variables(): variable_name = variable.name #print("parameter: %s" %(variable_name)) scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('spatial_transformer') < 0: print("pretrain parameter: %s" %(variable_name)) pretrain_params[variable_name] = variable print("train parameter: %s" %(variable_name)) train_params[variable_name] = variable saver_cnn = tf.train.Saver(pretrain_params) saver_transformers = tf.train.Saver(train_params) # pretrained_model if FLAGS.fine_tune: ckpt = tf.train.get_checkpoint_state(PRETRAIN_DIR) if ckpt and ckpt.model_checkpoint_path: print("Pretrained Model Loading.") saver_cnn.restore(sess, ckpt.model_checkpoint_path) print("Pretrained Model Restored.") else: print("No Pretrained Model.") # pretrained model from another type models. # # saver # print type(tf.all_variables()) # for variable in tf.trainable_variables(): # variable_name = variable.name # variable_value = variable.eval(sess) # if variable_name.find('softmax_linear') < 0 and variable_name.find('spatial_transformer') < 0: # print("trained parameter: %s" %(variable_name)) # scope, name = variable_name.split("/") # target, _ = name.split(":") # with tf.variable_scope(scope, reuse=True): # sess.run(tf.get_variable(target).assign(variable_value)) # trained_model = FLAGS.trained_model # print trained_model coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 batches = image_input.get_batches(FLAGS.batch_size) for batch in batches: train = batch[0] label = batch[1] _, loss_value = sess.run([train_op, loss], feed_dict={images: train, labels: label, keep_conv: 0.8, keep_hidden: 0.5}) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 examples_per_sec = num_examples_per_step / duration print("%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/iteration]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) index += 1 assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # test_indices = np.arange(len(teX)) # Get A Test Batch # np.random.shuffle(test_indices) # test_indices = test_indices[0:5] print "="*20 testx = train[0:2] #print testx testy = label[0:2] print np.argmax(testy[0]) print np.argmax(testy[1]) output_vec, predict, cost_value = sess.run([logits, predict_op, loss], feed_dict={images: testx, labels: testy, keep_conv: 1.0, keep_hidden: 1.0}) print predict print("test loss: %f" % (cost_value)) print "="*20 previous_time = end_time index += 1 assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # 100回ごと if index % 100 == 0: pass summary_str = sess.run(summary_op, feed_dict={images: train, labels: label, keep_conv: 0.8, keep_hidden: 0.5}) # サマリーに書き込む summary_writer.add_summary(summary_str, step) if step % 1 == 0 or (step * 1) == MAX_STEPS: pretrain_checkpoint_path = PRETRAIN_DIR + '/model.ckpt' train_checkpoint_path = TRAIN_DIR + '/model.ckpt' saver_cnn.save(sess, pretrain_checkpoint_path, global_step=step) saver_transformers.save(sess, train_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ images, labels = data_inputs.distorted_inputs(TF_RECORDS) # graphのoutput logits = model.inference(images) # loss graphのoutputとlabelを利用 loss = model.loss(logits, labels) # 学習オペレーション train_op = op.train(loss, global_step) # saver saver = tf.train.Saver(tf.all_variables()) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # model名 model_name = '/model%s.ckpt' % (tdatetime.strftime('%Y%m%d%H%M%S')) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # 100回ごと if step % 100 == 0: # stepごとの事例数 = mini batch size num_examples_per_step = BATCH_SIZE # 1秒ごとの事例数 examples_per_sec = num_examples_per_step / duration # バッチごとの時間 sec_per_batch = float(duration) # time, step数, loss, 1秒で実行できた事例数, バッチあたりの時間 format_str = '$s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)' print str(datetime.now()) + ': step' + str(step) + ', loss= '+ str(loss_value) + ' ' + str(examples_per_sec) + ' examples/sec; ' + str(sec_per_batch) + ' sec/batch' # 100回ごと if step % 100 == 0: pass summary_str = sess.run(summary_op) # サマリーに書き込む summary_writer.add_summary(summary_str, step) if step % 1000 == 0 or (step * 1) == MAX_STEPS: checkpoint_path = TRAIN_DIR + model_name saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # NYU Dataset V2 original size(480 x 640 x 3) -> crop -> (460 x 620 x 3) image_input = ImageInput('./data/nyu_depth_v2_labeled.mat') print("the number of train data: %d" % (len(image_input.images))) images = tf.placeholder(tf.float32, [ None, FLAGS.crop_size_height, FLAGS.crop_size_width, FLAGS.image_depth ]) depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) invalid_depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput if FLAGS.refine_train: print("refine train.") logits = model.inference_refine(images, keep_conv, keep_hidden) else: print("coarse train.") logits = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 loss = model.loss(logits, depths, invalid_depths) # 学習オペレーション train_op = op.train(loss, global_step) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) # saver #saver = tf.train.Saver(tf.all_variables()) sess.run(init_op) # coarseとrefineを分けて保存 coarse_params = {} refine_params = {} if FLAGS.refine_train: for variable in tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" % (variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" % (variable_name)) refine_params[variable_name] = variable else: for variable in tf.trainable_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count( "/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" % (variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" % (variable_name)) refine_params[variable_name] = variable # define saver saver_coarse = tf.train.Saver(coarse_params) saver_refine = tf.train.Saver(refine_params) # fine tune if FLAGS.fine_tune: # load coarse paramteters coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print("Pretrained coarse Model Loading.") saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) print("Pretrained coarse Model Restored.") else: print("No Pretrained coarse Model.") # load refine parameters refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: print("Pretrained refine Model Loading.") saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) print("Pretrained refine Model Restored.") else: print("No Pretrained refine Model.") # TODO train coarse or refine (change trainable) #if not FLAGS.coarse_train: # for val in coarse_params: # print val #if not FLAGS.refine_train: # for val in coarse_params: # print val # train refine coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # debug # サマリーのライターを設定 #summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) #batches = image_input.get_batches(FLAGS.batch_size)a #d = np.asarray(batches[0][0]) #print d.shape #a = np.asarray(batches[0][1]) #print a.shape #logits_val, logits_fine_val, loss_value = sess.run([logits, logits_fine, loss], feed_dict={images: batches[0][0], depths: batches[0][1], invalid_depths: batches[0][2], keep_conv: 1.0, keep_hidden: 1.0}) #print len(logits_val[0]) #print len(logits_fine_val[0]) #print loss_value # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 batches = image_input.get_batches(FLAGS.batch_size) vals = image_input.get_validation() for batch in batches: train = batch[0] depth = batch[1] ignore_depth = batch[2] _, loss_value = sess.run( [train_op, loss], feed_dict={ images: train, depths: depth, invalid_depths: ignore_depth, keep_conv: 0.8, keep_hidden: 0.5 }) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 examples_per_sec = num_examples_per_step / duration print( "%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/iteration]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if index % 50 == 0: output_vec, cost_value = sess.run( [logits, loss], feed_dict={ images: vals[0], depths: vals[1], invalid_depths: vals[2], keep_conv: 1.0, keep_hidden: 1.0 }) print("%s: %d[epoch]: %d[iteration]: validation loss: %f" % (datetime.now(), step, index, cost_value)) if index % 100 == 0: output_dir = "predicts_%05d_%08d" % (step, index) print("predicts output: %s" % output_dir) data_feed_inputs_nyu.output_predict( output_vec, output_dir) previous_time = end_time index += 1 # if index % 100 == 0: # pass # summary_str = sess.run(summary_op, feed_dict={images: train, labels: label, keep_conv: 0.8, keep_hidden: 0.5}) # # サマリーに書き込む # summary_writer.add_summary(summary_str, step) # if step % 5 == 0 or (step * 1) == MAX_STEPS: if FLAGS.refine_train: refine_checkpoint_path = REFINE_DIR + '/model.ckpt' saver_refine.save(sess, refine_checkpoint_path, global_step=step) else: coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()