def evaluate(): with tf.Graph().as_default(): # testデータのロード images, labels = data_inputs.inputs('data/train_kirin_norm_32.tfrecords') logits = model.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay) variables_to_restore = {} for v in tf.trainable_variables(): if v in tf.trainable_variables(): restore_name = variable_averages.average_name(v) else: restore_name = v.op.name variables_to_restore[restore_name] = v saver = tf.train.Saver(variables_to_restore) summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(): tr, va, te = read_dataset('../mnist.pkl.gz') binarizer = LabelBinarizer().fit(range(10)) x = tf.placeholder(tf.float32, [None, 784]) y = tf.placeholder(tf.float32, [None, 10]) keep_prob = tf.placeholder(tf.float32) preds = model.inference(x, keep_prob) loss, total_loss = model.loss(preds, y) acc = model.evaluation(preds, y) # learning rate: 0.1 train_op = model.training(total_loss, 0.1) init = tf.initialize_all_variables() sess = tf.Session() sess.run(init) for i in xrange(10000): batch_xs, batch_ys = tr.next_batch(50) if i % 100 == 0: train_acc = acc.eval(feed_dict={ x:batch_xs, y:binarizer.transform(batch_ys), keep_prob: 1.0}, session=sess) print "step: {0}, training accuracy {1}".format(i, train_acc) validation_accuracy = getAccuracy(x, y, keep_prob, binarizer, acc, va, sess) print("Validation accuracy : {0}".format(validation_accuracy)) train_op.run(feed_dict={ x:batch_xs, y:binarizer.transform(batch_ys), keep_prob: 0.5}, session=sess) test_accuracy = getAccuracy(x, y, keep_prob, binarizer, acc, te, sess) print("Test accuracy : ", test_accuracy)
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) image, label = input.get_input(LABEL_PATH, LABEL_FORMAT, IMAGE_PATH, IMAGE_FORMAT) logits = model.inference(image) loss = model.loss(logits, label) train_op = model.train(loss, global_step) saver = tf.train.Saver(tf.all_variables()) summary_op = tf.merge_all_summaries() init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto(log_device_placement=input.FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(input.FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(input.FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), "Model diverged with loss = NaN" if step % 1 == 0: num_examples_per_step = input.FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = "%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)" print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 10 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 25 == 0: checkpoint_path = os.path.join(input.FLAGS.train_dir, "model.ckpt") saver.save(sess, checkpoint_path, global_step=step)
def evaluate(network, checkpoint_dir): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' if network == 1: images, labels = cifar10.inputs(eval_data=eval_data) logits, w1, b1, w2, b2 = cifar10.inference(images) else: images, labels = cifar10_2.inputs(eval_data=eval_data) logits, w1, b1, w2, b2 = cifar10_2.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) preds = eval_once(network, saver, summary_writer, top_k_op, summary_op, checkpoint_dir) """while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)""" return preds
def __init__(self): super(ImageProcessor, self).__init__() self.stream = io.BytesIO() self.event = threading.Event() self.terminated = False self.inference = model.inference() self.start()
def evaluate(): with tf.Graph().as_default(): eval_data = FLAGS.eval_data == 'test' images, labels = model.inputs(eval_data=eval_data) logits = model.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """Eval model for a number of steps.""" with tf.Graph() as g: # Get images and labels for model. eval_data = FLAGS.eval_data == 'test' images, labels = model.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits = model.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(): with tf.Graph().as_default(), tf.device("/gpu:0"): global_step = tf.Variable(0, trainable=False) dirs_gray, dirs_color = make_data_directory_list() gray_images, color_images = input.read_dirs(dirs_gray, dirs_color, is_train=True) inferenced = model.inference(gray_images) raw_loss, total_loss = model.loss(inferenced, color_images) train_op = get_train_op(raw_loss, total_loss, global_step) summary_op = tf.merge_all_summaries() #saver = tf.train.Saver(tf.all_variables()) saver = tf.train.Saver(tf.trainable_variables()) init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print "restore from {}".format(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, value_raw_loss, value_total_loss = sess.run([train_op, raw_loss, total_loss]) duration = time.time() - start_time if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, raw_loss = %.2f, total_loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, value_raw_loss, value_total_loss, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) images, labels = model.distorted_inputs() logits = model.inference(images) loss = model.loss(logits, labels) train_op = model.train(loss, global_step) saver = tf.train.Saver(tf.all_variables()) summary_op = tf.merge_all_summaries() sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if FLAGS.resume_training and ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) current_step = int(ckpt.model_checkpoint_path .split('/')[-1].split('-')[-1]) else: current_step = 0 init = tf.initialize_all_variables() sess.run(init) tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(SUMMARY_DIR, graph_def=sess.graph_def) for step in xrange(current_step, FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f' '(%.1f examples/sec; %.3f' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 50 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step % 100 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def eval_h5(conf, ckpt): """ Train model for a number of steps. Args: conf: configuration dictionary ckpt: restore from ckpt """ cw = conf["cw"] mb_size = conf["mb_size"] path_tmp = conf["path_tmp"] n_epochs = conf["n_epochs"] iw = conf["iw"] grad_norm_thresh = conf["grad_norm_thresh"] # Prepare data tr_stream, te_stream = tools.prepare_data(conf) n_tr = tr_stream.dataset.num_examples n_te = te_stream.dataset.num_examples with tf.Graph().as_default(), tf.device("/cpu:0" if FLAGS.dev_assign else None): # Placeholders Xs = [tf.placeholder(tf.float32, [None, iw, iw, 1], name="X_%02d" % i) for i in range(FLAGS.num_gpus)] Ys = [ tf.placeholder(tf.float32, [None, iw - 2 * cw, iw - 2 * cw, 1], name="Y_%02d" % i) for i in range(FLAGS.num_gpus) ] # Calculate the gradients for each model tower tower_grads = [] y_splits = [] for i in range(FLAGS.num_gpus): with tf.device(("/gpu:%d" % i) if FLAGS.dev_assign else None): with tf.name_scope("%s_%02d" % (FLAGS.tower_name, i)) as scope: # Calculate the loss for one tower. This function constructs # the entire model but shares the variables across all towers. y_split = model.inference(Xs[i], conf) y_splits.append(y_split) total_loss = model.loss(y_split, Ys[i], conf, scope) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() y = tf.concat(0, y_splits, name="y") # Tensorflow boilerplate sess, saver, summ_writer, summ_op = tools.tf_boilerplate(None, conf, ckpt) # Evaluation psnr_tr = eval_epoch(Xs, Ys, y, sess, tr_stream, cw) psnr_te = eval_epoch(Xs, Ys, y, sess, te_stream, cw) print("approx psnr_tr=%.3f" % psnr_tr) print("approx psnr_te=%.3f" % psnr_te) tr_stream.close() te_stream.close()
def run_training(): # you need to change the directories to yours. train_dir = '/home/kevin/tensorflow/cats_vs_dogs/data/train/' logs_train_dir = '/home/kevin/tensorflow/cats_vs_dogs/logs/train/' train, train_label = input_data.get_files(train_dir) train_batch, train_label_batch = input_data.get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES) train_loss = model.losses(train_logits, train_label_batch) train_op = model.trainning(train_loss, learning_rate) train__acc = model.evaluation(train_logits, train_label_batch) summary_op = tf.summary.merge_all() sess = tf.Session() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) if step % 50 == 0: print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc*100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()
def main(argv=None): try: with tf.Graph().as_default(): # Load all images from disk data_set = data_input.read_image_batches_without_labels_from_file_list(images_to_predict, FLAGS) # Inference model images_placeholder, labels_placeholder = utils.create_placeholder_inputs(data_set.batch_size, FLAGS.image_height, FLAGS.image_width) logits = model.inference(images_placeholder, data_set.batch_size, FLAGS.num_classes) # Max. value is our prediction prediction=tf.argmax(logits,1) # Add the variable initializer Op. init = tf.initialize_all_variables() saver = tf.train.Saver() # Create a session for running Ops on the Graph. with tf.Session() as sess: sess.run(init) try: # Start the queue runners. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # Restore variables from disk. print("Restore session from checkpoint {0}\n".format(FLAGS.checkpoint)) saver.restore(sess, FLAGS.checkpoint) # Predict feed_dict = utils.create_feed_data(sess, images_placeholder, labels_placeholder, data_set) predictions, classes = sess.run([prediction, logits], feed_dict=feed_dict) # Print results for i in range(0, data_set.size): print("{0} top-1-class: {1}".format(images_to_predict[i], predictions[i])) print("Out = {0}\n".format(", ".join("{:0.2f}".format(i) for i in classes[i]))) finally: print("\nWaiting for all threads...") coord.request_stop() coord.join(threads) except: traceback.print_exc() finally: print("\nDone.\n")
def eval_sam(conf): """ Evaluate against the entire test set of images. Args: conf: configuration dictionary """ path_te = conf["path_eval"] iw = conf["iw"] sr = conf["sr"] cw = conf["cw"] fns_te = preproc._get_filenames(path_te) n = len(fns_te) with tf.Graph().as_default(), tf.device("/cpu:0" if FLAGS.dev_assign else None): # Placeholders Xs = [tf.placeholder(tf.float32, [None, iw, iw, 1], name="X_%02d" % i) for i in range(FLAGS.num_gpus)] y_splits = [] for i in range(FLAGS.num_gpus): with tf.device(("/gpu:%d" % i) if FLAGS.dev_assign else None): with tf.name_scope("%s_%02d" % (FLAGS.tower_name, i)) as scope: y_split, _ = model.inference(Xs[i], conf) y_splits.append(y_split) tf.get_variable_scope().reuse_variables() y = tf.concat(0, y_splits, name="y") # Restore saver = tf.train.Saver(tf.trainable_variables()) sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) ) ckpt = tf.train.get_checkpoint_state(conf["path_tmp"]) if ckpt: ckpt = ckpt.model_checkpoint_path print("checkpoint found: %s" % ckpt) saver.restore(sess, ckpt) else: print("checkpoint not found!") time.sleep(2) # Iterate over each image, and calculate error for fn in fns_te: lr = preproc.imresize(sm.imread(fn), float(sr)) lr = preproc.shave(lr, sr) # border = sr fn_ = fn.split("/")[-1].split(".")[0] out_name = os.path.join("tmp", fn_ + "_HR.bmp") infer(lr, Xs, y, sess, conf, out_name)
def main(_): with tf.Graph().as_default(): config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' sess = tf.InteractiveSession(config=config) x_image = tf.placeholder(tf.float32, shape=[None, 66, 200, 3], name="x_image") y_label = tf.placeholder(tf.float32, shape=[None, 1], name="y_label") keep_prob = tf.placeholder(tf.float32, name="keep_prob") y_pred = model.inference(x_image, keep_prob) norm, losses, total_loss = loss(y_pred, y_label) train_op = train(total_loss) merged_summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter('train', sess.graph) saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) checkpoint_path = os.path.join(LOG_DIR, "steering.ckpt") sess.run(tf.global_variables_initializer()) udacity_data.read_data() for epoch in range(EPOCH): for i in range(STEP_PER_EPOCH): steps = epoch * STEP_PER_EPOCH + i xs, ys = udacity_data.load_train_batch(BATCH_SIZE) _, summary = sess.run([train_op, merged_summary_op], feed_dict={x_image: xs, y_label: ys, keep_prob: 0.7}) if i % 10 == 0: xs, ys = udacity_data.load_val_batch(BATCH_SIZE) loss_value = losses.eval(feed_dict={x_image: xs, y_label: ys, keep_prob: 1.0}) print("Epoch: %d, Step: %d, Loss: %g" % (epoch, steps, loss_value)) # write logs at every iteration summary_writer.add_summary(summary, steps) if i % 32 == 0: if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) saver.save(sess, checkpoint_path)
def test(): with tf.Graph().as_default(): image, label = input.get_input(LABEL_PATH, LABEL_FORMAT, IMAGE_PATH, IMAGE_FORMAT) logits = model.inference(image) top_k_op = tf.nn.in_top_k(logits, label, 1) variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Get summaries for TENSOR BOARD summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(input.FLAGS.eval_dir, graph_def=graph_def) while True: evaluate_model(saver, summary_writer, top_k_op, summary_op) if input.FLAGS.run_once: break time.sleep(input.FLAGS.eval_interval_secs)
def test_one_image(): """ Test one image with the saved models and parameters """ test_image = get_one_image(test_dir) with tf.Graph().as_default(): BATCH_SIZE = 1 N_CLASSES = 2 image = tf.cast(test_image, tf.float32) image = tf.image.per_image_standardization(image) image = tf.reshape(image, [1, 208, 208, 3]) logit = model.inference(image, BATCH_SIZE, N_CLASSES) logit = tf.nn.softmax(logit) x = tf.placeholder(tf.float32, shape=[208, 208, 3]) saver = tf.train.Saver() with tf.Session() as sess: print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(train_logs_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found') prediction = sess.run(logit, feed_dict={x: test_image}) max_index = np.argmax(prediction) if max_index==0: print('This is a cat with possibility %.6f' %prediction[:, 0]) else: print('This is a dog with possibility %.6f' %prediction[:, 1])
def evaluate(): with tf.Graph().as_default() as g, tf.device("/cpu:0"): dirs_gray, dirs_color = make_data_directory_list() gray_images = input.read_dirs(dirs_gray, list_color_dir=None, is_train=False) inferenced = model.inference(gray_images) int_images = tf.image.convert_image_dtype(inferenced, dtype=tf.uint8, saturate=True) sess = tf.Session(config=tf.ConfigProto()) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver = tf.train.Saver() saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('No checkpoint file found') raise ValueError("invalid checkpoint") tf.train.start_queue_runners(sess=sess) count = 0 #num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) for i in range(10): value_images = sess.run([int_images])[0] for cur_image in value_images: count += 1 cv_image = cv2.cvtColor(cur_image, cv2.COLOR_RGB2BGR) cur_path = os.path.join(FLAGS.dir_out, "{}.png".format(count)) cv2.imwrite(cur_path, cv_image)
def run_training(): """ Train the Listnr model for a number of steps """ with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for runway # tr_frames_t, tr_labels_t = tm.inputs(FLAGS.batch_size) # ts_frames_t, ts_labels_t = tm.inputs(FLAGS.batch_size, train=False) # frames, labels = placeholder_inputs() frames, labels = tm.inputs(FLAGS.batch_size, NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) # Build a Graph that computes the logits predictions from the # inference model. logits = md.inference(frames) # Calculate loss. looss = md.loss(logits, labels) # calculate accuracy accuracy = md.accuracy(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = train(looss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=FLAGS.num_epochs) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # run the training steps_per_epoch = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size) max_steps = FLAGS.num_epochs * steps_per_epoch losses_epochs = [] losses_batches = [] accuracies_epochs = [] accuracies_batches = [] for step in range(max_steps + 1): start_time = time.time() _, loss_value, acc_value = sess.run([train_op, looss, accuracy]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 100 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f, train_acc = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, acc_value, examples_per_sec, sec_per_batch)) summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) losses_batches.append(loss_value) accuracies_batches.append(acc_value) # Save the model checkpoint periodically. if (step - 1) % steps_per_epoch == 0 or ( step + 1) == max_steps or _shutdown: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) #accuracies_epochs.append(np.mean(accuracies_batches)) #losses_epochs.append(np.mean(losses_batches)) # save accuracy and loss np.save(os.path.join(FLAGS.train_dir, 'tr_loss'), np.array(losses_batches)) np.save(os.path.join(FLAGS.train_dir, 'tr_accuracy'), np.array(accuracies_batches)) print('Saving model: ', (step - 1) / steps_per_epoch) if _shutdown: break print('Listnr training finished!')
def main(argv=None): example = tf.placeholder(tf.string) features = tf.parse_single_example(example, features={ 'label': tf.FixedLenFeature([], tf.int64), 'image_raw': tf.FixedLenFeature([], tf.string) }) decode = tf.image.decode_jpeg(features['image_raw'], channels=3) image1 = tf.image.resize_image_with_crop_or_pad(decode, 88, 88) image1 = tf.image.resize_images(image1, [96, 96]) image2 = tf.image.resize_image_with_crop_or_pad(decode, 96, 96) image2 = tf.image.resize_images(image2, [96, 96]) image3 = tf.image.resize_image_with_crop_or_pad(decode, 104, 104) image3 = tf.image.resize_images(image3, [96, 96]) image4 = tf.image.resize_image_with_crop_or_pad(decode, 100, 100) image4 = tf.image.resize_images(image4, [96, 96]) image5 = tf.image.resize_image_with_crop_or_pad(decode, 92, 92) image5 = tf.image.resize_images(image5, [96, 96]) inputs = tf.stack([ tf.image.per_image_standardization(image1), tf.image.per_image_standardization(image2), tf.image.per_image_standardization(image3), tf.image.per_image_standardization(image4), tf.image.per_image_standardization(image5) ]) # inputs = tf.expand_dims(tf.image.per_image_standardization(image), axis=0) labels = tf.expand_dims(features['label'], axis=0) sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) import model logits = tf.nn.softmax(model.inference(inputs, FLAGS.num_classes)) values, indices = tf.nn.top_k(logits) _, top_value = tf.nn.top_k(tf.transpose(values)) answer = tf.gather(indices, tf.squeeze(top_value)) # variable_averages = tf.train.ExponentialMovingAverage(model.MOVING_AVERAGE_DECAY) with tf.Session() as sess: tf.train.Saver(tf.trainable_variables()).restore( sess, FLAGS.checkpoint_path) # tf.train.Saver(variable_averages.variables_to_restore()).restore(sess, FLAGS.checkpoint_path) ok, ng = 0, 0 errors = {} error_images = [] for i, record in enumerate( tf.python_io.tf_record_iterator(FLAGS.data_file)): labels_value, answer_value, image_value = sess.run( [labels, answer, image2], feed_dict={example: record}) if labels_value[0] == answer_value[0]: ok += 1 else: print('{:04d}: {:3d} - {:3d}'.format(i, answer_value[0], labels_value[0])) if labels_value[0] not in errors: errors[labels_value[0]] = 0 errors[labels_value[0]] += 1 ng += 1 if len(error_images) < 100: error_images.append(image_value) size = 10 collage = tf.concat([ tf.concat(error_images[i * size:(i + 1) * size], 1) for i in range(size) ], 0) # collage = tf.image.convert_image_dtype(tf.div(collage, 255.0), tf.uint8) with open(os.path.join(os.path.dirname(__file__), 'errors.jpg'), 'wb') as f: f.write(sess.run(tf.image.encode_jpeg(collage))) print('{}/{} ({:.3f} %)'.format(ok, ok + ng, 100.0 * ok / (ok + ng))) print('errors:') for k, v in sorted(errors.items(), key=lambda x: x[1], reverse=True): if v >= 5: print('{:3d}: {:3d}'.format(k, v))
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for CIFAR-10. images, labels = cifar10.get_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits, fc1_w, fc2_w, fc1_b, fc2_b = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # L2 regularization for the fully connected parameters. regularizers = (tf.nn.l2_loss(fc1_w) + tf.nn.l2_loss(fc1_b) + tf.nn.l2_loss(fc2_w) + tf.nn.l2_loss(fc2_b)) # Add the regularization term to the loss. loss += 5e-4 * regularizers # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement, allow_soft_placement=True)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def train(): print('[Training Configuration]') print('\tTrain dir: %s' % FLAGS.train_dir) print('\tTraining max steps: %d' % FLAGS.max_steps) print('\tSteps per displaying info: %d' % FLAGS.display) print('\tSteps per testing: %d' % FLAGS.test_interval) print('\tSteps per saving checkpoints: %d' % FLAGS.checkpoint_interval) print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction) """Train aPascal for a number of steps.""" with tf.Graph().as_default(): init_step = 0 global_step = tf.Variable(0, trainable=False) # Get images and labels for aPascal. train_images, train_labels = model.distorted_inputs('train') test_images, test_labels = model.inputs('eval') # Build a Graph that computes the predictions from the inference model. images = tf.placeholder(tf.float32, [FLAGS.batch_size, model.IMAGE_WIDTH, model.IMAGE_WIDTH, 3]) labels = tf.placeholder(tf.int32, [FLAGS.batch_size, model.NUM_ATTRS]) probs = model.inference(images) # Calculate loss. (cross_entropy loss) loss, acc = model.loss_acc(probs, labels) tf.scalar_summary("accuracy", acc) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op, lr = model.train(loss, global_step) # Build the summary operation based on the TF collection of Summaries. train_summary_op = tf.merge_all_summaries() # Loss and accuracy summary used in test phase) loss_summary = tf.scalar_summary("test/loss", loss) acc_summary = tf.scalar_summary("test/accuracy", acc) test_summary_op = tf.merge_summary([loss_summary, acc_summary]) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: print('\tRestore from %s' % ckpt.model_checkpoint_path) # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) init_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found. Start from the scratch.') # if finetune, load variables of the final predication layers # from pretrained model if FLAGS.finetune: base_variables = tf.trainable_variables()[:-2*model.NUM_ATTRS] base_saver = tf.train.Saver(base_variables, max_to_keep=10000) ckpt = tf.train.get_checkpoint_state(FLAGS.pretrained_dir) print('Initial checkpoint: ' + ckpt.model_checkpoint_path) base_saver.restore(sess, ckpt.model_checkpoint_path) # Start the queue runners. tf.train.start_queue_runners(sess=sess) if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir) # Training!! for step in xrange(init_step, FLAGS.max_steps): start_time = time.time() try: train_images_val, train_labels_val = sess.run([train_images, train_labels]) _, lr_value, loss_value, acc_value, train_summary_str = sess.run([train_op, lr, loss, acc, train_summary_op], feed_dict={images:train_images_val, labels:train_labels_val}) except tf.python.framework.errors.InvalidArgumentError: embed() duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % FLAGS.display == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: (Training) step %d, loss=%.4f, acc=%.4f, lr=%f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, acc_value, lr_value, examples_per_sec, sec_per_batch)) summary_writer.add_summary(train_summary_str, step) if step % FLAGS.test_interval == 0: test_images_val, test_labels_val = sess.run([test_images, test_labels]) loss_value, acc_value, test_summary_str = sess.run([loss, acc, test_summary_op], feed_dict={images:test_images_val, labels:test_labels_val}) format_str = ('%s: (Test) step %d, loss=%.4f, acc=%.4f') print (format_str % (datetime.now(), step, loss_value, acc_value)) summary_writer.add_summary(test_summary_str, step) # Save the model checkpoint periodically. if step % FLAGS.checkpoint_interval == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def evaluate(): print('[Testing Configuration]') print('\tCheckpoint Dir: %s' % FLAGS.checkpoint_dir) print('\tDataset: %s data' % ('Training' if FLAGS.train_data else 'Test')) print('\tOutput: %s' % FLAGS.output) with open(DATASET_ATTRIBUTE_PATH, 'r') as fd: # attribute_list = [temp.strip() for temp in fd.readlines()] attribute_list = [temp.strip().split()[1] for temp in fd.readlines()] batch_size = FLAGS.batch_size num_attr = model.NUM_ATTRS with tf.Graph().as_default(): test_images, test_labels = model.inputs('train' if FLAGS.train_data else 'test', False) test_probs = model.inference(test_images) if FLAGS.train_data: total_cnt = data_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN else: total_cnt = data_input.NUM_EXAMPLES_PER_EPOCH_FOR_TEST # Start running operations on the Graph. init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=False)) sess.run(init) saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print '\tRestore from %s' % ckpt.model_checkpoint_path # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('No checkpoint file found') return # Open IPython shell, if flag set. if FLAGS.embed: embed() # Start the queue runners. coord = tf.train.Coordinator() tf.train.start_queue_runners(sess=sess, coord=coord) # tp, fp, tn, fn result_ll = [[0 for __ in range(4)] for _ in range(num_attr)] idx = 0 while idx < total_cnt: print('Current Idx: ' + str(idx)) end = min([idx + batch_size, total_cnt]) this_batch_size = end - idx probs_val, labels_val = sess.run([test_probs, test_labels]) preds = np.around(probs_val) for i in range(this_batch_size): for j in range(num_attr): if labels_val[i, j] == 0: if preds[i, j] == 0: # tn result_ll[j][2] += 1 if preds[i, j] == 1: # fp result_ll[j][1] += 1 if labels_val[i, j] == 1: if preds[i, j] == 0: # fn result_ll[j][3] += 1 if preds[i, j] == 1: # tp result_ll[j][0] += 1 idx = end coord.request_stop() coord.wait_for_stop(1) sess.close() accuracy, precision, recall, f1 = ([], [], [], []) for i in range(num_attr): tp, fp, tn, fn = result_ll[i] assert total_cnt == (tp+fp+tn+fn) accuracy.append((tp+tn)/float(total_cnt)) if tp+fp == 0: precision.append(0.0) else: precision.append(tp/float(tp+fp)) if tp+fn == 0: recall.append(0.0) else: recall.append(tp/float(tp+fn)) if precision[i] == .0 or np.isnan(precision[i]) or recall[i] == .0: f1.append(0.0) else: f1.append(2/(1/precision[i]+1/recall[i])) result_t = [0 for _ in range(4)] for i in range(num_attr): for j in range(4): result_t[j] += result_ll[i][j] tp_t, fp_t, tn_t, fn_t = result_t # accuracy_t = float(tp_t+tn_t)/float(sum(result_t)) # precision_t = tp_t/float(tp_t+fp_t) # recall_t = tp_t/float(tp_t+fn_t) # f1_t = 2./(1./precision_t+1./recall_t) accuracy_t = np.average(accuracy) precision_t = np.average(precision) recall_t = np.average(recall) f1_t = np.average(f1) print 'Attribute\tTP\tFP\tTN\tFN\tAcc.\tPrec.\tRecall\tF1-score' for i in range(num_attr): tp, fp, tn, fn = result_ll[i] format_str = '%-15s %7d %7d %7d %7d %.5f %.5f %.5f %.5f' print format_str % (attribute_list[i].replace(' ', '-'),tp,fp,tn,fn,accuracy[i],precision[i],recall[i],f1[i]) print(format_str % ('(Total)',tp_t,fp_t,tn_t,fn_t,accuracy_t,precision_t,recall_t,f1_t)) with open(FLAGS.output, 'w') as fd: fd.write('Attribute\tTP\tFP\tTN\tFN\tAcc.\tPrec.\tRecall\tF1-score\n') for i in range(num_attr): tp, fp, tn, fn = result_ll[i] format_str = '%-15s %7d %7d %7d %7d %.5f %.5f %.5f %.5f\n' fd.write(format_str % (attribute_list[i].replace(' ', '-'),tp,fp,tn,fn,accuracy[i],precision[i],recall[i],f1[i])) fd.write(format_str % ('(Total)',tp_t,fp_t,tn_t,fn_t,accuracy_t,precision_t,recall_t,f1_t))
def run_predictions(): """Predict labels for new images.""" with tf.Graph().as_default(): # Create images pipeline. path = tf.placeholder(tf.string) image = tf.read_file(path) image = tf.image.decode_jpeg(image, channels=3) image = tf.image.resize_image_with_crop_or_pad(image, 54, 54) image = tf.image.per_image_standardization(image) # Build a Graph that computes predictions from the inference model. logits = model.inference(image, 1.0) # The Op to return predictions. predict = model.predict(logits) # The op for initializing the variables. init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Create a saver to restore the model from the latest checkpoint. saver = tf.train.Saver() with tf.Session() as sess: # Initialize the session. sess.run(init_op) # Restore the latest model snapshot. ckpt = tf.train.get_checkpoint_state(FLAGS.logdir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print('Model restored: {}'.format(ckpt.model_checkpoint_path)) else: print('No checkpoint file found!') return with open('predict.csv', 'w') as csv: # Loop through files in the predict directory. csv.write( 'image,label,label_pct,counter,counter_pct,y1,y1_pct,y2,y2_pct,y3,y3_pct,y4,y4_pct,y5,y5_pct\n' ) for filename in os.listdir(FLAGS.predictdir): file = FLAGS.predictdir + filename run_ops = [ predict[0], predict[1], predict[2], predict[3], predict[4], predict[5] ] p0, p1, p2, p3, p4, p5 = sess.run(run_ops, feed_dict={path: file}) i0 = np.argmax(p0) i1 = np.argmax(p1) i2 = np.argmax(p2) i3 = np.argmax(p3) i4 = np.argmax(p4) i5 = np.argmax(p5) if i0 == 0: label = 'X' label_pct = np.exp(p0[0][i0]) if i0 == 1: label = '{}'.format(p1[i1]) label_pct = np.exp(p0[0][i0] + p1[0][i1]) if i0 == 2: label = '{}{}'.format(i1, i2) label_pct = np.exp(p0[0][i0] + p1[0][i1] + p2[0][i2]) if i0 == 3: label = '{}{}{}'.format(i1, i2, i3) label_pct = np.exp(p0[0][i0] + p1[0][i1] + p2[0][i2] + p3[0][i3]) if i0 == 4: label = '{}{}{}{}'.format(i1, i2, i3, i4) label_pct = np.exp(p0[0][i0] + p1[0][i1] + p2[0][i2] + p3[0][i3] + p4[0][i4]) if i0 == 5: label = '{}{}{}{}{}'.format(i1, i2, i3, i4, i5) label_pct = np.exp(p0[0][i0] + p1[0][i1] + p2[0][i2] + p3[0][i3] + p4[0][i4] + p5[0][i5]) if i0 == 6: label = '+' label_pct = np.exp(p0[0][i0]) csv.write(','.join([ file, label, '%0.1f' % (100 * label_pct), str(i0), '%0.1f' % (100 * np.exp(p0[0][i0])), str(i1), '%0.1f' % (100 * np.exp(p1[0][i1])), str(i2), '%0.1f' % (100 * np.exp(p2[0][i2])), str(i3), '%0.1f' % (100 * np.exp(p3[0][i3])), str(i4), '%0.1f' % (100 * np.exp(p4[0][i4])), str(i5), '%0.1f' % (100 * np.exp(p5[0][i5])), ])) csv.write('\n')
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ #images, labels = data_inputs.distorted_inputs(TF_RECORDS) # 教師データ mnist = np.load('./data/mnist_sequence1_sample_5distortions5x5.npz') trX = mnist['X_train'] trY = mnist['y_train'] # X_valid = mnist_cluttered['X_valid'] # y_valid = mnist_cluttered['y_valid'] teX = mnist['X_test'] teY = mnist['y_test'] trX = trX.reshape(-1, 40, 40, 1) teX = teX.reshape(-1, 40, 40, 1) # % turn from dense to one hot representation trY = dense_to_one_hot(trY, n_classes=10) # Y_valid = dense_to_one_hot(y_valid, n_classes=10) teY = dense_to_one_hot(teY, n_classes=10) print("the number of train data: %d" % (len(trX))) # create mini_batch #datas, targets = trX.(trX, trY, BATCH_SIZE) images = tf.placeholder(tf.float32, [None, 40, 40, 1]) labels = tf.placeholder(tf.float32, [None, 10]) keep_conv = tf.placeholder("float") keep_hidden = tf.placeholder("float") # graphのoutput logits = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 #loss = model.loss(logits, labels) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels)) predict_op = tf.argmax(logits, 1) # 学習オペレーション train_op = op.train(loss, global_step) # saver saver = tf.train.Saver(tf.all_variables()) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 for start, end in zip(range(0, len(trX), BATCH_SIZE), range(BATCH_SIZE, len(trX), BATCH_SIZE)): _, loss_value = sess.run([train_op, loss], feed_dict={images: trX[start:end], labels: trY[start:end], keep_conv: 0.8, keep_hidden: 0.5}) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 * (step+1) examples_per_sec = num_examples_per_step / duration print("%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/step]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) index += 1 assert not np.isnan(loss_value), 'Model diverged with loss = NaN' test_indices = np.arange(len(teX)) # Get A Test Batch np.random.shuffle(test_indices) test_indices = test_indices[0:5] print "="*20 print teY[test_indices] predict, cost_value = sess.run([predict_op, loss], feed_dict={images: teX[test_indices], labels: teY[test_indices], keep_conv: 1.0, keep_hidden: 1.0}) print predict print("test loss: %f" % (cost_value)) print "="*20 previous_time = end_time index += 1 assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # 1000回ごと if index % 100 == 0: pass summary_str = sess.run(summary_op, feed_dict={images: teX[test_indices], labels: teY[test_indices], keep_conv: 1.0, keep_hidden: 1.0}) # サマリーに書き込む summary_writer.add_summary(summary_str, step) if step % 1 == 0 or (step * 1) == MAX_STEPS: checkpoint_path = TRAIN_DIR + '/model.ckpt' saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def train(): ''' Train CNN_tiny for a number of steps. ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # 教師データ #images, labels = data_inputs.distorted_inputs(TF_RECORDS) # 教師データ mnist = np.load('./data/mnist_sequence1_sample_5distortions5x5.npz') trX = mnist['X_train'] trY = mnist['y_train'] # X_valid = mnist_cluttered['X_valid'] # y_valid = mnist_cluttered['y_valid'] teX = mnist['X_test'] teY = mnist['y_test'] trX = trX.reshape(-1, 40, 40, 1) teX = teX.reshape(-1, 40, 40, 1) # % turn from dense to one hot representation trY = dense_to_one_hot(trY, n_classes=10) trY = trY.reshape(-1, 10) # Y_valid = dense_to_one_hot(y_valid, n_classes=10) teY = dense_to_one_hot(teY, n_classes=10) teY = teY.reshape(-1, 10) print("the number of train data: %d" % (len(trX))) # create mini_batch #datas, targets = trX.(trX, trY, BATCH_SIZE) images = tf.placeholder(tf.float32, [None, 40, 40, 1]) labels = tf.placeholder(tf.float32, [None, 10]) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput logits = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 #loss = model.loss(logits, labels) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits, labels)) predict_op = tf.argmax(logits, 1) # 学習オペレーション train_op = op.train(loss, global_step) # saver saver = tf.train.Saver(tf.all_variables()) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # サマリーのライターを設定 summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 for start, end in zip(range(0, len(trX), BATCH_SIZE), range(BATCH_SIZE, len(trX), BATCH_SIZE)): _, loss_value = sess.run( [train_op, loss], feed_dict={ images: trX[start:end], labels: trY[start:end], keep_conv: 0.8, keep_hidden: 0.5 }) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 * (step + 1) examples_per_sec = num_examples_per_step / duration print( "%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/step]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) index += 1 assert not np.isnan( loss_value), 'Model diverged with loss = NaN' test_indices = np.arange(len(teX)) # Get A Test Batch np.random.shuffle(test_indices) test_indices = test_indices[0:5] print "=" * 20 print teY[test_indices] predict, cost_value = sess.run( [predict_op, loss], feed_dict={ images: teX[test_indices], labels: teY[test_indices], keep_conv: 1.0, keep_hidden: 1.0 }) print predict print("test loss: %f" % (cost_value)) print "=" * 20 previous_time = end_time index += 1 assert not np.isnan( loss_value), 'Model diverged with loss = NaN' # 1000回ごと if index % 100 == 0: pass summary_str = sess.run(summary_op, feed_dict={ images: trX[start:end], labels: trY[start:end], keep_conv: 0.8, keep_hidden: 0.5 }) # サマリーに書き込む summary_writer.add_summary(summary_str, step) if step % 1 == 0 or (step * 1) == MAX_STEPS: checkpoint_path = TRAIN_DIR + '/model.ckpt' saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def main(): training_images, training_labels, test_images, test_labels = md.Load_Data( mat) if REMOVE_EMPTY: empty_idx = md.is_empty(training_images, training_labels) images_transform = np.delete(training_images, empty_idx, 0) labels_transform = np.delete(training_labels, empty_idx, 0) training_data = md.GetData(images_transform, labels_transform) else: training_data = md.GetData(training_images, training_labels) test_data = md.GetData(test_images, test_labels) g = tf.Graph() with g.as_default(): images = tf.placeholder(tf.float32, [BATCH_SIZE, 256, 256, 1]) labels = tf.placeholder(tf.int64, [BATCH_SIZE, 256, 256]) is_training = tf.placeholder(tf.bool) if AUGMENT_IMAGE: images, labels = md.image_augmentation(images, labels) logits = md.inference(images, is_training) loss = md.loss_calc(logits=logits, labels=labels) train_op, global_step = md.training(loss=loss, learning_rate=1e-04) accuracy = md.evaluation(logits=logits, labels=labels) dice = md.get_dice(logits=logits, labels=labels) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver( [x for x in tf.global_variables() if 'Adam' not in x.name]) sm = tf.train.SessionManager() with sm.prepare_session("", init_op=init, saver=saver, checkpoint_dir=LOG_DIR) as sess: sess.run( tf.variables_initializer( [x for x in tf.global_variables() if 'Adam' in x.name])) train_writer = tf.summary.FileWriter(LOG_DIR + "/Train", sess.graph) test_writer = tf.summary.FileWriter(LOG_DIR + "/Test") global_step_value, = sess.run([global_step]) print("Last trained iteration was: ", global_step_value) for step in range(global_step_value + 1, global_step_value + MAX_STEPS + 1): print("Iteration: ", step) images_batch, labels_batch = training_data.next_batch( BATCH_SIZE) train_feed_dict = { images: images_batch, labels: labels_batch, is_training: True } train_dice_value, _, train_loss_value, train_accuracy_value, train_summary_str = sess.run( [dice, train_op, loss, accuracy, summary], feed_dict=train_feed_dict) if step % SAVE_INTERVAL == 0: print("Train Loss: ", train_loss_value) print("Train accuracy: ", train_accuracy_value) print("Train dice: ", train_dice_value) train_writer.add_summary(train_summary_str, step) train_writer.flush() images_batch, labels_batch = test_data.next_batch( BATCH_SIZE) test_feed_dict = { images: images_batch, labels: labels_batch, is_training: False } test_dice_value, test_loss_value, test_accuracy_value, test_summary_str = sess.run( [dice, loss, accuracy, summary], feed_dict=test_feed_dict) print("Test Loss: ", test_loss_value) print("Test accuracy: ", test_accuracy_value) print("Test dice: ", test_dice_value) test_writer.add_summary(test_summary_str, step) test_writer.flush() saver.save(sess, CHECKPOINT_FL, global_step=step) print("Session Saved") print("================")
def run_training(): # you need to change the directories to yours. train_dir = '/home/maqunfei/PycharmProjects/catvsdog/data/train/train' logs_train_dir = '/home/maqunfei/PycharmProjects/catvsdog/logs/train/train' print 'test 11' #调用得到解析出来的数据 train, train_label = input_data.getfiles(train_dir) print 'test 21' #获取 批次数据的批操作 train_batch, train_label_batch = input_data.get_batch( train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) #是用模型进行训练得到预测结果列表 train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES) #train_logits = tf.nn.softmax(train_logits) #print 'train_logits:'+train_logits train_loss = model.losses(train_logits, train_label_batch) train_op = model.trainning(train_loss, learning_rate) train__acc = model.evaluation(train_logits, train_label_batch) #启动所有汇总语句 summary_op = tf.summary.merge_all() sess = tf.Session() #将所有汇总进行保存 train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break #这是进行每次训练的步骤 每次可以得到准确率等值 這裏執行train_op 每次的操作都會調用取數據操作 _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) ####################################################每次迭代中的輸出 真实標籤和预测标签的预测 #print 'true label',sess.run(train_label_batch)# 這地方很巧妙,相當於運行了 train_label_batch節點,返回了train_label_batch所包含的值 #old_showlogit=sess.run(train_logits) #print 'old prediction label:', old_showlogit #showlogit = b=[i[1] > i[0] and 1 or 0 for i in old_showlogit] #進行處理下,第一個大設置爲0 第二個大設置爲1 #print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) #print 'prediction label:',showlogit ##################################################### if step % 50 == 0: # 每隔50步 将模型中得到的损失和准确率输出来 print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) summary_str = sess.run(summary_op) #每隔50布进行汇总 并将loss和acc记录 并写入日志文件进行记录 train_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') #每隔2000步骤 将模型结构进行保存 saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()
print(len(pkl)) with tf.Graph().as_default(), tf.Session(config=config) as sess: img_search = tf.placeholder('float32', shape=(None, 227, 227, 3)) img_street = tf.placeholder('float32', shape=(None, 227, 227, 3)) img_aerial = tf.placeholder('float32', shape=(None, 227, 227, 3)) cross_feature = model.inference_crossview([img_search, img_street, img_aerial] , 1, FLAGS.feature, False) norm_cross_pred = model.feature_normalize([cross_feature]) cross_pred = norm_cross_pred[0] feature = model.inference(img_search, 1, FLAGS.feature) norm_pred = model.feature_normalize([feature]) pred = norm_pred[0] saver = tf.train.Saver() if FLAGS.model_dir: saver.restore(sess, FLAGS.model_dir) else: saver.restore(sess, 'model/{}/model_final'.format(FLAGS.feature)) pkl_list = {} if True: output_dir = os.path.join(output_root, "cross")
def train(verbose=False): with tf.Graph().as_default(): # global step number global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) dataset = DataSet() # get training set print("The number of training images is: %d" % (dataset.cnt_samples(FLAGS.traincsv))) images, labels = dataset.csv_inputs(FLAGS.traincsv, FLAGS.batch_size, distorted=True) images_debug = datasets.debug(images) # get test set #test_cnt = dataset.cnt_samples(FLAGS.testcsv) test_cnt = 100 images_test, labels_test = dataset.test_inputs(FLAGS.testcsv, test_cnt) images_test_debug = datasets.debug(images_test) input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES)) num_classes = FLAGS.num_classes restore_logits = not FLAGS.fine_tune # inference # logits is tuple (logits, aux_liary_logits, predictions) # logits: output of final layer, auxliary_logits: output of hidden layer, softmax: predictions logits = model.inference(images, num_classes, for_training=True, restore_logits=restore_logits) logits_test = model.inference(images_test, num_classes, for_training=False, restore_logits=restore_logits, reuse=True, dropout_keep_prob=1.0) # loss model.loss(logits, labels, batch_size=FLAGS.batch_size) model.loss_test(logits_test, labels_test, batch_size=test_cnt) losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) losses_test = tf.get_collection(slim.losses.LOSSES_COLLECTION_TEST) # Calculate the total loss for the current tower. regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(losses + regularization_losses, name='total_loss') #total_loss = tf.add_n(losses, name='total_loss') total_loss_test = tf.add_n(losses_test, name='total_loss_test') # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) loss_averages_test = tf.train.ExponentialMovingAverage(0.9, name='avg_test') loss_averages_op_test = loss_averages_test.apply(losses_test + [total_loss_test]) if verbose: print "="*10 print "loss length:" print len(losses) print len(losses_test) print "="*10 # for l in losses + [total_loss]: # # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # # session. This helps the clarity of presentation on TensorBoard. # loss_name = re.sub('%s_[0-9]*/' % model.TOWER_NAME, '', l.op.name) # # Name each loss as '(raw)' and name the moving average version of the loss # # as the original loss name. # tf.scalar_summary(loss_name + ' (raw)', l) # tf.scalar_summary(loss_name, loss_averages.average(l)) # loss to calcurate gradients # with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) tf.summary.scalar("loss", total_loss) with tf.control_dependencies([loss_averages_op_test]): total_loss_test = tf.identity(total_loss_test) tf.summary.scalar("loss_eval", total_loss_test) # Reuse variables for the next tower. #tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) # Retain the Batch Normalization updates operations only from the # final tower. Ideally, we should grab the updates from all towers # but these stats accumulate extremely fast so we can ignore the # other stats from the other towers without significant detriment. batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION) # add input summaries # summaries.extend(input_summaries) # train_operation and operation summaries train_op = train_operation.train(total_loss, global_step, summaries, batchnorm_updates) # trainable variables's summary #for var in tf.trainable_variables(): # summaries.append(tf.histogram_summary(var.op.name, var)) # saver saver = tf.train.Saver(tf.global_variables()) # Build the summary operation from the last tower summaries. #summary_op = tf.merge_summary(summaries) summary_op = tf.summary.merge(summaries) # initialization init = tf.global_variables_initializer() # session gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options)) sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) if FLAGS.fine_tune: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: print('%s: Pre-trained model restored from %s' % (datetime.now(), ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) # if FLAGS.pretrained_model_checkpoint_path: # assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path) # variables_to_restore = tf.get_collection( # slim.variables.VARIABLES_TO_RESTORE) # restorer = tf.train.Saver(variables_to_restore) # restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, logits_eval, loss_value, labels_eval, images_debug_eval = sess.run([train_op, logits[0], total_loss, labels, images_debug]) duration = time.time() - start_time dataset.output_images(images_debug_eval, "debug", "train") assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: examples_per_sec = FLAGS.batch_size / float(duration) format_str = ('train %s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, duration)) if step % 100 == 0: print("predict:") print type(logits_eval) print logits_eval.shape print logits_eval.argmax(1) print("target:") print labels_eval summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) test_start_time = time.time() logits_test_eval, total_loss_test_val, labels_test_eval, images_test_debug_eval = sess.run([logits_test[0], total_loss_test, labels_test, images_test_debug]) test_duration = time.time() - test_start_time dataset.output_images(images_test_debug_eval, "debug_test", "test") print("test predict:") print type(logits_test_eval) print logits_test_eval.shape print logits_test_eval.argmax(1) print("test target:") print labels_test_eval test_examples_per_sec = test_cnt / float(test_duration) format_str_test = ('test %s: step %d, loss = %.2f, (%.1f examples/sec; %.3f sec/batch)') print(format_str_test % (datetime.now(), step, total_loss_test_val, test_examples_per_sec, test_duration)) # Save the model checkpoint periodically. if step % 5000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def get_batch(): data_batch = OCRIter(batch_size, img_h, img_w) image_batch, label_batch = data_batch.iter() #label_batch1 = np.reshape(np.transpose(label_batch),[-1]) #label_batch1 = tf.one_hot(label_batch1,65) image_batch1 = np.array(image_batch) label_batch1 = np.array(label_batch) return image_batch1, label_batch1 train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7 = model.inference( image_holder, keep_prob) train_loss1, train_loss2, train_loss3, train_loss4, train_loss5, train_loss6, train_loss7 = model.losses( train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7, label_holder) train_op1, train_op2, train_op3, train_op4, train_op5, train_op6, train_op7 = model.trainning( train_loss1, train_loss2, train_loss3, train_loss4, train_loss5, train_loss6, train_loss7, learning_rate) train_acc = model.evaluation(train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7, label_holder) input_image = tf.summary.image('input', image_holder) #tf.summary.histogram('label',label_holder) #label的histogram,测试训练代码时用,参考:http://geek.csdn.net/news/detail/197155
img, label = read_and_decode("train.tfrecords") #使用shuffle_batch可以随机打乱输入 img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size=64, capacity=2000, min_after_dequeue=1000) x = tf.placeholder(tf.float32, shape=[None, w, h, c], name='x') #tensor y_ = tf.placeholder(tf.int32, shape=[ None, ], name='y_') regularizer = tf.contrib.layers.l2_regularizer(0.0001) logits = model.inference(x, regularizer) #(小处理)将logits乘以1赋值给logits_eval,定义name,方便在后续调用模型时通过tensor名字调用输出tensor b = tf.constant(value=1, dtype=tf.float32) logits_eval = tf.multiply(logits, b, name='logits_eval') loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_) train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() # 开启多线程
def run_training(continue_run): logging.info('EXPERIMENT NAME: %s' % exp_config.experiment_name) already_created_recursion = False print("ALready created recursion : " + str(already_created_recursion)) base_data = h5py.File(os.path.join(log_dir, 'base_data.hdf5'), 'r') try: images_val = np.array(base_data['images_test']) labels_val = np.array(base_data['masks_test']) # Tell TensorFlow that the model will be built into the default Graph. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True if not os.path.exists(validation_res_path): os.makedirs(validation_res_path) # with tf.Graph().as_default(): with tf.Session(config=config) as sess: # Generate placeholders for the images and labels. image_tensor_shape = [exp_config.batch_size] + list( exp_config.image_size) + [1] mask_tensor_shape = [exp_config.batch_size] + list( exp_config.image_size) print("Exp config image size : " + str(exp_config.image_size)) images_placeholder = tf.placeholder(tf.float32, shape=image_tensor_shape, name='images') labels_placeholder = tf.placeholder(tf.uint8, shape=mask_tensor_shape, name='labels') learning_rate_placeholder = tf.placeholder(tf.float32, shape=[]) crf_learning_rate_placeholder = tf.placeholder(tf.float32, shape=[]) training_time_placeholder = tf.placeholder(tf.bool, shape=[]) tf.summary.scalar('learning_rate', learning_rate_placeholder) # Build a Graph that computes predictions from the inference model. logits = model.inference(images_placeholder, exp_config.model_handle, training=training_time_placeholder, nlabels=exp_config.nlabels) # Add the variable initializer Op. init = tf.global_variables_initializer() # Create a session for running Ops on the Graph. sess = tf.Session() saver = tf.train.Saver(max_to_keep=2) # Run the Op to initialize the variables. sess.run(init) saver.restore( sess, '/scratch_net/biwirender02/cany/scribble/logdir/unet2D_ws_spot_blur_crf/recursion_1_model_best_dice.ckpt-31099' ) # Evaluate against the validation set. logging.info('Validation Data Eval:') eval_val_loss = model.evaluation( logits, labels_placeholder, images_placeholder, nlabels=exp_config.nlabels, loss_type=exp_config.loss_type, weak_supervision=True, cnn_threshold=exp_config.cnn_threshold, include_bg=False) [val_loss, val_dice] = do_eval(sess, eval_val_loss, images_placeholder, labels_placeholder, training_time_placeholder, images_val, labels_val, exp_config.batch_size) logging.info( 'Found new best dice on validation set! - {} - '.format( val_dice)) # sio.savemat( validation_res_path+ '/result'+'_'+str(step)+'.mat', {'pred':np.float32(hard_pred), # 'labels':np.float32(labels), 'dices':np.asarray(cdice)}) # except Exception: raise
def run_training(): # you need to change the directories to yours. s_train_dir = '/home/hrz/projects/tensorflow/emotion/ck+/CK+YuanTu' T_train_dir = '/home/hrz/projects/tensorflow/emotion/ck+/CK+X_mid' logs_train_dir = '/home/hrz/projects/tensorflow/emotion/ck+' s_train, s_train_label = input_data.get_files(s_train_dir) s_train_batch, s_train_label_batch = input_data.get_batch(s_train, s_train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) T_train, T_train_label = input_data.get_files(T_train_dir) T_train_batch, T_train_label_batch = input_data.get_batch(T_train, T_train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) train_logits = model.inference(s_train_batch,T_train_batch, BATCH_SIZE, N_CLASSES) train_loss = model.losses(train_logits, s_train_label_batch) train_op = model.trainning(train_loss, learning_rate) train__acc = model.evaluation(train_logits, s_train_label_batch) summary_op = tf.summary.merge_all() #汇总操作 sess = tf.Session() #定义sess train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) # saver = tf.train.Saver() #保存操作 sess.run(tf.global_variables_initializer())#初始化所有变量 coord = tf.train.Coordinator() #设置多线程协调器 threads = tf.train.start_queue_runners(sess=sess, coord=coord) #开始Queue Runners(队列运行器) #开始训练过程 try: for step in np.arange(MAX_STEP): if coord.should_stop(): break _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) if step % 50 == 0: print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, tra_loss, tra_acc*100.0)) #运行汇总操作,写入汇总 summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 800 == 0 or (step + 1) == MAX_STEP: #保存当前模型和权重到 logs_train_dir,global_step为当前迭代次数 checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()
global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(0.001, global_step, 10000, 0.8, staircase=True) # train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) opt = tf.train.AdamOptimizer(learning_rate) tower_grads = [] for i in xrange(gpu_num): with tf.device('/gpu:%d' % i): with tf.name_scope('GPU_%d' % i) as scope: img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size=batch_size, capacity=5000, min_after_dequeue=1000) label_batch = tf.reshape(label_batch, [batch_size, height, width, 1]) img_batch = tf.reshape(img_batch, [batch_size, height, width, 3]) size = np.array([batch_size, height, width]) annotation_pred = model.inference(img_batch, is_training) loss = tf.reduce_mean(tf.square(tf.subtract(annotation_pred, label_batch))) tf.get_variable_scope().reuse_variables() grads = opt.compute_gradients(loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # ckpt=tf.train.get_checkpoint_state('E:/Tianchi/my_network/model/') # if ckpt and ckpt.model_checkpoint_path: # saver.restore(sess,ckpt.model_checkpoint_path) # print('model restored')
def eval_te(conf): """ Evaluate against the entire test set of images. Args: conf: configuration dictionary Returns: psnr: psnr of entire test set """ path_te = conf["path_eval"] iw = conf["iw"] sr = conf["sr"] cw = conf["cw"] save = conf["save_sr_imgs"] fns_te = preproc._get_filenames(path_te) n = len(fns_te) with tf.Graph().as_default(), tf.device("/cpu:0" if FLAGS.dev_assign else None): # Placeholders Xs = [tf.placeholder(tf.float32, [None, iw, iw, 1], name="X_%02d" % i) for i in range(FLAGS.num_gpus)] y_splits = [] for i in range(FLAGS.num_gpus): with tf.device(("/gpu:%d" % i) if FLAGS.dev_assign else None): with tf.name_scope("%s_%02d" % (FLAGS.tower_name, i)) as scope: y_split, _ = model.inference(Xs[i], conf) y_splits.append(y_split) tf.get_variable_scope().reuse_variables() y = tf.concat(0, y_splits, name="y") # Restore saver = tf.train.Saver(tf.trainable_variables()) sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) ) ckpt = tf.train.get_checkpoint_state(conf["path_tmp"]) if ckpt: ckpt = ckpt.model_checkpoint_path print("checkpoint found: %s" % ckpt) saver.restore(sess, ckpt) else: print("checkpoint not found!") time.sleep(2) # Iterate over each image, and calculate error avg_psnr, avg_bl_psnr = 0.0, 0.0 for fn in fns_te: lr, gt = preproc.lr_hr(sm.imread(fn), sr) fn_ = fn.split("/")[-1].split(".")[0] out_name = os.path.join("tmp", fn_ + "_HR.png") if save else None hr = infer(lr, Xs, y, sess, conf, out_name) # Evaluate gt = gt[cw:, cw:] gt = gt[: hr.shape[0], : hr.shape[1]] diff = gt.astype(np.float32) - hr.astype(np.float32) mse = np.mean(diff ** 2) psnr = 20 * np.log10(255.0 / np.sqrt(mse)) avg_psnr += psnr lr = lr[cw:, cw:] lr = lr[: hr.shape[0], : hr.shape[1]] bl_diff = gt.astype(np.float32) - lr.astype(np.float32) bl_mse = np.mean(bl_diff ** 2) bl_psnr = 20 * np.log10(255.0 / np.sqrt(bl_mse)) avg_bl_psnr += bl_psnr print("hr PSNR: %.3f, lr PSNR % .3f for %s" % (psnr, bl_psnr, fn.split("/")[-1])) avg_psnr /= len(fns_te) avg_bl_psnr /= len(fns_te) print("average test PSNR: %.3f" % avg_psnr) print("average baseline PSNR: %.3f" % avg_bl_psnr) return avg_psnr, avg_bl_psnr
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. If input_path is specified, download the data from GCS to # the folder expected by read_data_sets. data_dir = tempfile.mkdtemp() if FLAGS.input_path: files = [os.path.join(FLAGS.input_path, file_name) for file_name in INPUT_FILES] subprocess.check_call(['gsutil', '-m', '-q', 'cp', '-r'] + files + [data_dir]) print('Data dir %s', data_dir) data_sets = read_data_sets(data_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = model.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = model.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = model.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = model.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. # Remove this if once Tensorflow 0.12 is standard. try: summary_op = tf.contrib.deprecated.merge_all_summaries() except AttributeError: summary_op = tf.merge_all_summaries() # Add the variable initializer Op. # Remove this if once Tensorflow 0.12 is standard. try: init = tf.global_variables_initializer() except AttributeError: init = tf.initialize_all_variables() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. # Remove this if once Tensorflow 0.12 is standard. try: summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) except AttributeError: summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # And then after everything is built: # Run the Op to initialize the variables. sess.run(init) # Start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
'Directory containing Benchmark Dataset(img_placeholder, data_list, and tag_list.' ) flags.DEFINE_string('train_dir', '/home/ardiya/HashtagPrediction', 'Directory with the training data.') flags.DEFINE_string('train_file', 'harrison_train.tfrecords', 'Filename of the training data') flags.DEFINE_string('test_file', 'harrison_test.tfrecords', 'Filename of the test data') if __name__ == '__main__': with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) batch_x, batch_labels, batch_y = input.inputs( filename=os.path.join(FLAGS.train_dir, FLAGS.test_file)) logits = model.inference(batch_x, is_training=False) batch_y = tf.cast(batch_y, tf.int64) names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'accuracy': slim.metrics.streaming_accuracy(logits, batch_y), 'eval/precision/1': slim.metrics.streaming_sparse_precision_at_k( logits, batch_labels, 1), 'eval/precision/5': slim.metrics.streaming_sparse_precision_at_k( logits, batch_labels, 5), 'eval/precision/10': slim.metrics.streaming_sparse_precision_at_k( logits, batch_labels, 10),
MAX_STEP = 10000 # 一般大于10K learning_rate = 0.0001 # 一般小于0.0001 # 获取批次batch train_dir = 'C:/Users/Guo/Desktop/flower_world-master/input_data' # 训练样本的读入路径 logs_train_dir = r'C:\Users\Guo\Desktop\flower_world-master\save' # logs存储路径 # train, train_label = input_data.get_files(train_dir) train, train_label, val, val_label = input_data.get_files(train_dir, 0.3) # 训练数据及标签 train_batch, train_label_batch = input_data.get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) # 测试数据及标签 val_batch, val_label_batch = input_data.get_batch(val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) # 训练操作定义 train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES) train_loss = model.losses(train_logits, train_label_batch) train_op = model.trainning(train_loss, learning_rate) train_acc = model.evaluation(train_logits, train_label_batch) # 测试操作定义 test_logits = model.inference(val_batch, BATCH_SIZE, N_CLASSES) test_loss = model.losses(test_logits, val_label_batch) test_acc = model.evaluation(test_logits, val_label_batch) # 这个是log汇总记录 summary_op = tf.summary.merge_all() # 产生一个会话 sess = tf.Session() # 产生一个writer来写log文件
def run_training(): ''' The function which builds the graph and conducts training and validation ''' with tf.Graph().as_default(): images, labels = inputs(batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs, filename=os.path.join(FLAGS.train_dir, TRAIN_FILE)) logits = model.inference(images, reuse=False, bn_epsilon=FLAGS.bn_epsilon, dim=FLAGS.dim) labels = tf.to_int64(labels) regularization_loss = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) ce_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) loss = tf.add_n([ce_loss], regularization_loss) pred = tf.nn.softmax(logits) train_acc = accuracy(pred, labels) train_op = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate).minimize(loss) # Validation vali_images, vali_labels = inputs(batch_size=FLAGS.batch_size, num_epochs=4 * FLAGS.num_epochs, filename=os.path.join( FLAGS.train_dir, VALIDATION_FILE)) vali_logits = model.inference(vali_images, reuse=True, bn_epsilon=FLAGS.bn_epsilon, dim=FLAGS.dim) vali_labels = tf.to_int64(vali_labels) vali_pred = tf.nn.softmax(vali_logits) vali_accuracy = accuracy(vali_pred, vali_labels) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session() saver = tf.train.Saver() if not os.path.exists(FLAGS.save_dir): os.makedirs(FLAGS.save_dir) save_path = os.path.join(FLAGS.save_dir, FLAGS.weights_file) try: print('Trying to restore checkpoint') last_chk_path = tf.train.latest_checkpoint( checkpoint_dir=FLAGS.save_dir) saver.restore(sess, save_path=last_chk_path) print('Restored checkpoint from:', last_chk_path) except: print('No checkpoint found. Initializing variables..') sess.run(init_op) # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) tr_acc_list = [] val_acc_list = [] iters_list = [] try: step = 0 while not coord.should_stop(): start_time = time.time() _, loss_value, acc = sess.run([train_op, loss, train_acc]) duration = time.time() - start_time # Print an overview fairly often. if step % 100 == 0: vali_acc = sess.run([vali_accuracy]) print( 'Step %d: loss = %.2f Train Accuracy = %.3f Vali Accuracy = %.3f (%.3f sec)' % (step, loss_value, acc, vali_acc[0], duration)) tr_acc_list.append(acc) val_acc_list.append(vali_acc) iters_list.append(step) step += 1 if step % FLAGS.max_steps == 0: saver.save(sess, save_path=save_path, global_step=step) break except tf.errors.OutOfRangeError: print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) finally: coord.request_stop() coord.join(threads) sess.close() fig = plt.figure(figsize=(10, 10)) plt.plot(iters_list, tr_acc_list) plt.plot(iters_list, val_acc_list) plt.legend(['training accuracy', 'validation accuracy'], loc='upper left') plt.savefig('accuracy.png')
def run_train(): """ :param images_array: shape=batch_size, image_height, image_width, 1 :param labels_array: list of 5 items whose shape=batch_size :return: """ with tf.Graph().as_default() as graph: global_step = tf.train.get_or_create_global_step() input_train_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name='input_train_placeholder') list_labels = [] for i in range(NUM_DIGITS): list_labels.append(tf.placeholder(dtype=tf.int32, shape=[None], name='labels_{}'.format(i))) list_logits = model.inference(images=input_train_placeholder) batch_loss = model.loss(list_logits, list_labels) batch_loss_summary = tf.summary.scalar('total_loss', batch_loss) optimizer = model.train(batch_loss, global_step) # Accuracy batch_accuracy = model.get_accurary(list_logits, list_labels) batch_absolute_accuracy = model.get_absolute_accurary(list_logits, list_labels) batch_accuracy_summary = tf.summary.scalar('batch_accuracy', batch_accuracy) batch_absolute_accuracy_summary = tf.summary.scalar('batch_absolute_accuracy', batch_absolute_accuracy) init = tf.global_variables_initializer() train_writer = tf.summary.FileWriter('summary_logs/train_{}'.format(name_logging()), graph=graph) test_writer = tf.summary.FileWriter('summary_logs/test_{}'.format(name_logging()), graph=graph) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.5 saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) with tf.Session(config=config) as sess: # Run the initializer sess.run(init) step = 0 test_images, test_labels = svhn_input.get_test(size=FLAGS.TEST_SIZE) test_images = test_images.reshape(list(test_images.shape) + [1]) for images, labels in svhn_input.get_batch(batch_size=BATCH_SIZE, num_epoch=FLAGS.NUM_EPOCH): step += 1 images = images.reshape(list(images.shape) + [1]) sess.run(optimizer, feed_dict= {input_train_placeholder: images, list_labels[0]: labels[:, 0], list_labels[1]: labels[:, 1], list_labels[2]: labels[:, 2], list_labels[3]: labels[:, 3], list_labels[4]: labels[:, 4] }) if step % 10 == 0: print(step) summary1, summary2, summary3 = sess.run([batch_loss_summary, batch_accuracy_summary, batch_absolute_accuracy_summary], feed_dict= {input_train_placeholder: images, list_labels[0]: labels[:, 0], list_labels[1]: labels[:, 1], list_labels[2]: labels[:, 2], list_labels[3]: labels[:, 3], list_labels[4]: labels[:, 4] }) train_writer.add_summary(summary1, step) train_writer.add_summary(summary2, step) train_writer.add_summary(summary3, step) summary1, summary2, summary3 = sess.run([batch_loss_summary, batch_accuracy_summary, batch_absolute_accuracy_summary], feed_dict= {input_train_placeholder: test_images, list_labels[0]: test_labels[:, 0], list_labels[1]: test_labels[:, 1], list_labels[2]: test_labels[:, 2], list_labels[3]: test_labels[:, 3], list_labels[4]: test_labels[:, 4] }) test_writer.add_summary(summary1, step) test_writer.add_summary(summary2, step) test_writer.add_summary(summary3, step) if step % 500 == 0: path = saver.save(sess, save_path=CHECKPOINT_DIR + name_logging()+ '/' + PREFIX, global_step=step) print('Saved model at {}'.format(path))
def train(): ''' Train ''' with tf.Graph().as_default(): # globalなstep数 global_step = tf.Variable(0, trainable=False) # NYU Dataset V2 original size(480 x 640 x 3) -> crop -> (460 x 620 x 3) image_input = ImageInput('./data/nyu_depth_v2_labeled.mat') print("the number of train data: %d" % (len(image_input.images))) images = tf.placeholder(tf.float32, [None, FLAGS.crop_size_height, FLAGS.crop_size_width, FLAGS.image_depth]) depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) invalid_depths = tf.placeholder(tf.float32, [None, 1, 55, 74]) keep_conv = tf.placeholder(tf.float32) keep_hidden = tf.placeholder(tf.float32) # graphのoutput if FLAGS.refine_train: print("refine train.") logits = model.inference_refine(images, keep_conv, keep_hidden) else: print("coarse train.") logits = model.inference(images, keep_conv, keep_hidden) # loss graphのoutputとlabelを利用 loss = model.loss(logits, depths, invalid_depths) # 学習オペレーション train_op = op.train(loss, global_step) # サマリー summary_op = tf.merge_all_summaries() # 初期化オペレーション init_op = tf.initialize_all_variables() # Session sess = tf.Session(config=tf.ConfigProto(log_device_placement=LOG_DEVICE_PLACEMENT)) # saver #saver = tf.train.Saver(tf.all_variables()) sess.run(init_op) # coarseとrefineを分けて保存 coarse_params = {} refine_params = {} if FLAGS.refine_train: for variable in tf.all_variables(): variable_name = variable.name print("parameter: %s" % (variable_name)) if variable_name.find("/") < 0 or variable_name.count("/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" % (variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" % (variable_name)) refine_params[variable_name] = variable else: for variable in tf.trainable_variables(): variable_name = variable.name print("parameter: %s" %(variable_name)) if variable_name.find("/") < 0 or variable_name.count("/") != 1: print("ignore.") continue scope, name = variable_name.split("/") target, _ = name.split(":") if variable_name.find('coarse') >= 0: print("coarse parameter: %s" %(variable_name)) coarse_params[variable_name] = variable if variable_name.find('fine') >= 0: print("refine parameter: %s" %(variable_name)) refine_params[variable_name] = variable # define saver saver_coarse = tf.train.Saver(coarse_params) saver_refine = tf.train.Saver(refine_params) # fine tune if FLAGS.fine_tune: # load coarse paramteters coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR) if coarse_ckpt and coarse_ckpt.model_checkpoint_path: print("Pretrained coarse Model Loading.") saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path) print("Pretrained coarse Model Restored.") else: print("No Pretrained coarse Model.") # load refine parameters refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR) if refine_ckpt and refine_ckpt.model_checkpoint_path: print("Pretrained refine Model Loading.") saver_refine.restore(sess, refine_ckpt.model_checkpoint_path) print("Pretrained refine Model Restored.") else: print("No Pretrained refine Model.") # TODO train coarse or refine (change trainable) #if not FLAGS.coarse_train: # for val in coarse_params: # print val #if not FLAGS.refine_train: # for val in coarse_params: # print val # train refine coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # debug # サマリーのライターを設定 #summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) #batches = image_input.get_batches(FLAGS.batch_size)a #d = np.asarray(batches[0][0]) #print d.shape #a = np.asarray(batches[0][1]) #print a.shape #logits_val, logits_fine_val, loss_value = sess.run([logits, logits_fine, loss], feed_dict={images: batches[0][0], depths: batches[0][1], invalid_depths: batches[0][2], keep_conv: 1.0, keep_hidden: 1.0}) #print len(logits_val[0]) #print len(logits_fine_val[0]) #print loss_value # max_stepまで繰り返し学習 for step in xrange(MAX_STEPS): start_time = time.time() previous_time = start_time index = 0 batches = image_input.get_batches(FLAGS.batch_size) vals = image_input.get_validation() for batch in batches: train = batch[0] depth = batch[1] ignore_depth = batch[2] _, loss_value = sess.run([train_op, loss], feed_dict={images: train, depths: depth, invalid_depths: ignore_depth, keep_conv: 0.8, keep_hidden: 0.5}) if index % 10 == 0: end_time = time.time() duration = end_time - previous_time num_examples_per_step = BATCH_SIZE * 10 examples_per_sec = num_examples_per_step / duration print("%s: %d[epoch]: %d[iteration]: train loss %f: %d[examples/iteration]: %f[examples/sec]: %f[sec/iteration]" % (datetime.now(), step, index, loss_value, num_examples_per_step, examples_per_sec, duration)) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if index % 50 == 0: output_vec, cost_value = sess.run([logits, loss], feed_dict={images: vals[0], depths: vals[1], invalid_depths: vals[2], keep_conv: 1.0, keep_hidden: 1.0}) print("%s: %d[epoch]: %d[iteration]: validation loss: %f" % (datetime.now(), step, index, cost_value)) if index % 100 == 0: output_dir = "predicts_%05d_%08d" % (step, index) print("predicts output: %s" % output_dir) data_feed_inputs_nyu.output_predict(output_vec, output_dir) previous_time = end_time index += 1 # if index % 100 == 0: # pass # summary_str = sess.run(summary_op, feed_dict={images: train, labels: label, keep_conv: 0.8, keep_hidden: 0.5}) # # サマリーに書き込む # summary_writer.add_summary(summary_str, step) # if step % 5 == 0 or (step * 1) == MAX_STEPS: if FLAGS.refine_train: refine_checkpoint_path = REFINE_DIR + '/model.ckpt' saver_refine.save(sess, refine_checkpoint_path, global_step=step) else: coarse_checkpoint_path = COARSE_DIR + '/model.ckpt' saver_coarse.save(sess, coarse_checkpoint_path, global_step=step) coord.request_stop() coord.join(threads) sess.close()
def run_training(): # you need to change the directories to yours. train_dir = 'D:/1shibie/RD/data/train/' logs_train_dir = 'D:/1shibie/RD/logs/train/' logs_val_dir = 'D:/1shibie/RD/logs//val/' train, train_label, val, val_label = input_train_val_split.get_files( train_dir, RATIO) train_batch, train_label_batch = input_train_val_split.get_batch( train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) val_batch, val_label_batch = input_train_val_split.get_batch( val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES) loss = model.losses(logits, train_label_batch) train_op = model.trainning(loss, learning_rate) acc = model.evaluation(logits, train_label_batch) x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE]) with tf.Session() as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) val_writer = tf.summary.FileWriter(logs_val_dir, sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break tra_images, tra_labels = sess.run( [train_batch, train_label_batch]) _, tra_loss, tra_acc = sess.run([train_op, loss, acc], feed_dict={ x: tra_images, y_: tra_labels }) if step % 50 == 0: print( 'Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run( [val_batch, val_label_batch]) val_loss, val_acc = sess.run([loss, acc], feed_dict={ x: val_images, y_: val_labels }) print( '** Step %d, val loss = %.2f, val accuracy = %.2f%% **' % (step, val_loss, val_acc * 100.0)) summary_str = sess.run(summary_op) val_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) test_loss, test_acc = sess.run([loss, acc], feed_dict={ x: val_images, y_: val_labels }) print('test loss=%.2f,test accuracy=%.2f%%' % (test_loss, test_acc * 100)) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
s_train_dir = '/home/hrz/projects/tensorflow/emotion/ck+/CK+YuanTu' T_train_dir = '/home/hrz/projects/tensorflow/emotion/ck+/CK+X_mid' logs_train_dir = '/home/hrz/projects/tensorflow/emotion/ck+' s_train, s_train_label = input_data.get_files(s_train_dir) #print(s_train) s_train_batch, s_train_label_batch = input_data.get_batch(s_train, s_train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) #print(s_train_label_batch) T_train, T_train_label = input_data.get_files(T_train_dir) T_train_batch, T_train_label_batch = input_data.get_batch(T_train, T_train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) #print(len(s_train),len(s_train_label),len(T_train),len(T_train_label)) #print(s_train_batch,s_train_label_batch,T_train_batch,s_train_label_batch) train_logits = model.inference(s_train_batch,T_train_batch, BATCH_SIZE, N_CLASSES) train_loss = model.losses(train_logits, s_train_label_batch) train_op = model.trainning(train_loss, learning_rate) #correct_prediction = tf.equal(tf.argmax(train_logits,1),tf.argmax(y_,1)) labels_max = tf.reduce_max(s_train_label_batch) sess = tf.Session() print(sess.run(labels_max))
def train(): train_dir = './data' logs_train_dir = './logs/train/' logs_val_dir = './logs/val/' train, train_label, val, val_label = input_train_val_split.get_files(train_dir, RATIO) train_batch, train_label_batch = input_train_val_split.get_batch_OneHot(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY, CHANNEL) val_batch, val_label_batch = input_train_val_split.get_batch_OneHot(val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY, CHANNEL) with tf.name_scope('input'): x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, CHANNEL], name='x-input') y_ = tf.placeholder(tf.float32, shape=[BATCH_SIZE, N_CLASSES], name='y-input') keep_prob = tf.placeholder(tf.float32) # To run nicely in jupyter notebook #sess = tf.InteractiveSession() hidden5 = model.inference( images = x, IMG_W = IMG_W, IMG_H = IMG_H, keep_prob = keep_prob, BATCH_SIZE = BATCH_SIZE, N_CLASSES = N_CLASSES, CHANNEL = CHANNEL) # Probabilities - output from model (not the same as logits) y = tf.nn.softmax(hidden5)#? # Loss and optimizer with tf.name_scope('cross_entropy'): # The raw formulation of cross-entropy, diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=hidden5) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) # Setup to test accuracy of model with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): #tf.equal: Returns the truth value of (x == y) element-wise. #print('tf.argmax(y, 1) = {}'.format(tf.argmax(y, 1))) #print('tf.argmax(y_, 1) = {}'.format(tf.argmax(y_, 1))) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): #tf.cast: Casts a tensor to a new type. #tf.reduce_mean: Computes the mean of elements across dimensions of a tensor. accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) saver = tf.train.Saver() with tf.Session() as sess: # Merge all the summaries and write them out to /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) test_writer = tf.summary.FileWriter(logs_val_dir) #tf.global_variables_initializer().run()#? ## Initilize all global variables sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess = sess, coord = coord) #bTrain=True: data is batch of train data #bTrain=False: data is all of test data def feed_dict(bTrain): """Make a TensorFlow feed_dict: maps data onto Tensor placeholders.""" if bTrain: xs, ys = tra_images, tra_labels#mnist.train.next_batch(100, fake_data=fake_data) k = dropout else: xs, ys = val_images, val_labels k = 1.0 #print('len(xs) = {}'.format(len(xs))) #print('len(ys) = {}'.format(len(ys))) #print('ys[0] = {}'.format(ys[0])) #print('k = {}'.format(k)) return {x: xs, y_: ys, keep_prob: k} # Train model # Run once to get the model to a good confidence level for i in range(max_steps): if i % 100 == 0: val_images, val_labels = sess.run([val_batch, val_label_batch]) #print('len(val_labels) = {}'.format(len(val_labels))) #print('len(val_labels[0]) = {}'.format(len(val_labels[0]))) #print('val_labels[0] = {}'.format(val_labels[0])) summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False)) test_writer.add_summary(summary, i) print('Accuracy at step %s: %s' % (i, acc)) if i % 2000 == 0 or (i + 1) == max_steps: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step = i) else: # Record train set summaries, and train tra_images, tra_labels = sess.run([train_batch, train_label_batch]) if i % 200 == 199: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True), options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % i) train_writer.add_summary(summary, i) #print('Adding run metadata for', i) else: # Record a summary summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True)) train_writer.add_summary(summary, i) train_writer.close() test_writer.close() coord.request_stop() coord.join(threads)
def run_training(continue_run): logging.info('EXPERIMENT NAME: %s' % exp_config.experiment_name) init_step = 0 # Load data base_data, recursion_data, recursion = acdc_data.load_and_maybe_process_scribbles(scribble_file=sys_config.project_root + exp_config.scribble_data, target_folder=log_dir, percent_full_sup=exp_config.percent_full_sup, scr_ratio=exp_config.length_ratio ) #wrap everything from this point onwards in a try-except to catch keyboard interrupt so #can control h5py closing data try: loaded_previous_recursion = False start_epoch = 0 if continue_run: logging.info('!!!!!!!!!!!!!!!!!!!!!!!!!!!! Continuing previous run !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') try: try: init_checkpoint_path = utils.get_latest_model_checkpoint_path(log_dir, 'recursion_{}_model.ckpt'.format(recursion)) except: init_checkpoint_path = utils.get_latest_model_checkpoint_path(log_dir, 'recursion_{}_model.ckpt'.format(recursion - 1)) loaded_previous_recursion = True logging.info('Checkpoint path: %s' % init_checkpoint_path) init_step = int(init_checkpoint_path.split('/')[-1].split('-')[-1]) + 1 # plus 1 b/c otherwise starts with eval start_epoch = int(init_step/(len(base_data['images_train'])/4)) logging.info('Latest step was: %d' % init_step) logging.info('Continuing with epoch: %d' % start_epoch) except: logging.warning('!!! Did not find init checkpoint. Maybe first run failed. Disabling continue mode...') continue_run = False init_step = 0 start_epoch = 0 logging.info('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') if loaded_previous_recursion: logging.info("Data file exists for recursion {} " "but checkpoints only present up to recursion {}".format(recursion, recursion - 1)) logging.info("Likely means postprocessing was terminated") recursion_data = acdc_data.load_different_recursion(recursion_data, -1) recursion-=1 # load images and validation data images_train = np.array(base_data['images_train']) scribbles_train = np.array(base_data['scribbles_train']) images_val = np.array(base_data['images_test']) labels_val = np.array(base_data['masks_test']) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True # with tf.Graph().as_default(): with tf.Session(config = config) as sess: # Generate placeholders for the images and labels. image_tensor_shape = [exp_config.batch_size] + list(exp_config.image_size) + [1] mask_tensor_shape = [exp_config.batch_size] + list(exp_config.image_size) images_placeholder = tf.placeholder(tf.float32, shape=image_tensor_shape, name='images') labels_placeholder = tf.placeholder(tf.uint8, shape=mask_tensor_shape, name='labels') learning_rate_placeholder = tf.placeholder(tf.float32, shape=[]) training_time_placeholder = tf.placeholder(tf.bool, shape=[]) tf.summary.scalar('learning_rate', learning_rate_placeholder) # Build a Graph that computes predictions from the inference model. logits = model.inference(images_placeholder, exp_config.model_handle, training=training_time_placeholder, nlabels=exp_config.nlabels) summary = tf.summary.merge_all() # Add the variable initializer Op. init = tf.global_variables_initializer() # Run the Op to initialize the variables. sess.run(init) saver = tf.train.Saver(max_to_keep=2) if continue_run: # Restore session saver.restore(sess, init_checkpoint_path) step = init_step curr_lr = exp_config.learning_rate no_improvement_counter = 0 best_val = np.inf last_train = np.inf loss_history = [] loss_gradient = np.inf best_dice = 0 logging.info('RECURSION {0}'.format(recursion)) # # random walk - if it already has been random walked it won't redo # recursion_data = acdc_data.random_walk_epoch(recursion_data, exp_config.rw_beta, exp_config.rw_threshold, exp_config.random_walk) #Have reached end of recursion recursion_data = predict_next_gt(data=recursion_data, images_train=images_train, images_placeholder=images_placeholder, training_time_placeholder=training_time_placeholder, logits=logits, sess=sess) recursion_data = postprocess_gt(data=recursion_data, images_train=images_train, scribbles_train=scribbles_train) recursion += 1 # random walk - if it already has been random walked it won't redo recursion_data = acdc_data.random_walk_epoch(recursion_data, exp_config.rw_beta, exp_config.rw_threshold, exp_config.random_walk) except Exception: raise
def run_training(): # you need to change the directories to yours. train_dir = 'C:/MIGUEL/ML/dogs and cats/train/' logs_train_dir = 'C:/MIGUEL/ML/dogs and cats/logs/train/' logs_val_dir = 'C:/MIGUEL/ML/dogs and cats/logs/val/' train, train_label, val, val_label = input_train_val_split.get_files(train_dir, RATIO) train_batch, train_label_batch = input_train_val_split.get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) val_batch, val_label_batch = input_train_val_split.get_batch(val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3],name='x') y_ = tf.placeholder(tf.int32, shape=[BATCH_SIZE],name='y') training = tf.placeholder_with_default(False, shape=(), name='training') logits = model.inference(x, BATCH_SIZE, N_CLASSES,training) loss = model.losses(logits, y_) acc = model.evaluation(logits, y_) train_op = model.trainning(loss, learning_rate) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess= sess, coord=coord) saver = tf.train.Saver() summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) val_writer = tf.summary.FileWriter(logs_val_dir, sess.graph) bn_update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS) for op in (x,y_,training,train_op, loss, acc,summary_op,logits):#,bn_update_ops tf.add_to_collection("retrain_ops",op) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break tra_images,tra_labels = sess.run([train_batch, train_label_batch]) if step % 50 != 0: _, lossValue, accValue,bnUpdateOps = sess.run([train_op, loss, acc,bn_update_ops] ,feed_dict={x:tra_images, y_:tra_labels,training:True}) else : # means step % 50 == 0 _, lossValue, accValue,bnUpdateOps,summary_str = sess.run([train_op, loss, acc,bn_update_ops,summary_op] ,feed_dict={x:tra_images, y_:tra_labels,training:True}) train_writer.add_summary(summary_str, step) print('Step %d, train loss = %.2f, train accuracy = %.2f%%' %(step, lossValue, accValue*100.0)) if step % 200 == 0 or (step) == MAX_STEP: val_images, val_labels = sess.run([val_batch, val_label_batch]) val_loss, val_acc ,summary_str= sess.run([loss, acc,summary_op], feed_dict={x:val_images, y_:val_labels}) val_writer.add_summary(summary_str, step) print('** Step %d, val loss = %.2f, val accuracy = %.2f%% **' %(step, val_loss, val_acc*100.0)) if step % 20 == 0 or (step ) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get train images and labels float_image, label = tfrecord.train_data_read( tfrecord_path=FLAGS.train_data) images, labels = tfrecord.create_batch(float_image, label, count_num=FLAGS.train_num) # Get evaluate images and labels eval_float_image, eval_label = tfrecord.eval_data_read( tfrecord_path=FLAGS.eval_data_dir) eval_images, eval_labels = tfrecord.create_batch( eval_float_image, eval_label, count_num=FLAGS.eval_num) # Model inference x = tf.placeholder(tf.float32, [ FLAGS.batch_size, FLAGS.image_width, FLAGS.image_height, FLAGS.image_channels ], name='x_input') y_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None], name='y_input') keep_prob = tf.placeholder(tf.float32) #logits = model.inference(images, FLAGS.keep_prob) logits = model.inference(x, keep_prob) # loss computing loss = model.loss(logits, y_) # accuracy compution #accuracy = model.accuracy(model.inference(eval_images, 1), eval_labels) accuracy = model.accuracy(logits, y_) # train model train_op = model.train(loss, global_step) # save model saver = tf.train.Saver(tf.global_variables()) # merge all summaries summary_op = tf.summary.merge_all() # initialize all variables init = tf.initialize_all_variables() # Run session sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # start queue runners tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): if step % 10 == 0: imgs, lbls = sess.run([eval_images, eval_labels]) summary_str, acc = sess.run( [summary_op, accuracy], feed_dict={ x: imgs, y_: np.reshape(lbls, (FLAGS.batch_size, -1)), keep_prob: 1.0 }) summary_writer.add_summary(summary_str, step) print('%s: step %d, accuracy = %.3f' % (datetime.now(), step, acc)) else: imgs, lbls = sess.run([images, labels]) if step % 100 == 99 or (step + 1) == FLAGS.max_steps: summary_str, _ = sess.run( [summary_op, train_op], feed_dict={ x: imgs, y_: np.reshape(lbls, (FLAGS.batch_size, -1)), keep_prob: FLAGS.keep_prob }) summary_writer.add_summary(summary_str, step) checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) else: start_time = time.time() _, loss_value = sess.run( [train_op, loss], feed_dict={ x: imgs, y_: np.reshape(lbls, (FLAGS.batch_size, -1)), keep_prob: FLAGS.keep_prob }) duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))
def run_training(): train_dir = './data/train/' logs_train_dir = './logs/train/' train, train_label = input_data.get_files(train_dir) train_batch, train_label_batch = input_data.get_batch( train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES) train_loss = model.losses(train_logits, train_label_batch) train_op = model.trainning(train_loss, learning_rate) train__acc = model.evaluation(train_logits, train_label_batch) summary_op = tf.summary.merge_all() sess = tf.Session() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) if step % 10 == 0: print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 500 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) if step > 300 and tra_loss == 0.00: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') file = r'./failed.mp3' pygame.mixer.init() track = pygame.mixer.music.load(file) pygame.mixer.music.play() time.sleep(10) pygame.mixer.music.stop() finally: coord.request_stop() file = r'./successful.mp3' pygame.mixer.init() track = pygame.mixer.music.load(file) pygame.mixer.music.play() time.sleep(10) pygame.mixer.music.stop() coord.join(threads) sess.close()
def main(argv=None): try: with tf.Graph().as_default(): # Load all images from disk data_set = data_input.read_image_batches_with_labels_from_path(FLAGS.eval_dir, FLAGS) # Inference model images_placeholder, labels_placeholder = utils.create_placeholder_inputs(data_set.batch_size, FLAGS.image_height, FLAGS.image_width) logits = model.inference(images_placeholder, data_set.batch_size, FLAGS.num_classes) # Accuracy correct = tf.nn.in_top_k(logits, labels_placeholder, 1) eval_correct_k_1 = tf.reduce_sum(tf.cast(correct, tf.int32)) correct = tf.nn.in_top_k(logits, labels_placeholder, 2) eval_correct_k_2 = tf.reduce_sum(tf.cast(correct, tf.int32)) # Prediction used for confusion matrix prediction = tf.argmax(logits,1) # Add the variable initializer Op. init = tf.initialize_all_variables() saver = tf.train.Saver() # Create a session for running Ops on the Graph. with tf.Session() as sess: sess.run(init) try: # Start the queue runners. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # Restore variables from disk. print("Restore session from checkpoint {0}".format(FLAGS.checkpoint)) saver.restore(sess, FLAGS.checkpoint) # Run evaluation print("\nRunning evaluation for {0}\n".format(FLAGS.eval_dir)) num_examples, true_count = do_eval(sess, eval_correct_k_1, images_placeholder, labels_placeholder, data_set) precision = true_count / num_examples print('Top-1-Accuracy | Num examples: %d Num correct: %d Precision @ 1: %0.04f' % (num_examples, true_count, precision)) num_examples, true_count = do_eval(sess, eval_correct_k_2, images_placeholder, labels_placeholder, data_set) precision = true_count / num_examples print('Top-2-Accuracy | Num examples: %d Num correct: %d Precision @ 1: %0.04f\n' % (num_examples, true_count, precision)) # Create confusion matrix confusion_matrix = create_confusion_matrix(sess, prediction, images_placeholder, labels_placeholder, data_set, FLAGS.num_classes) print("{0}".format(confusion_matrix)) finally: print("\nWaiting for all threads...") coord.request_stop() coord.join(threads) except: traceback.print_exc() finally: print("\nDone.\n")
def generate_adversarial_examples(input_folder, output_path, model_path, attack, attack_args, exp_config, add_gaussian=False): nx, ny = exp_config.image_size[:2] batch_size = 1 num_channels = exp_config.nlabels image_tensor_shape = [batch_size] + list(exp_config.image_size) + [1] mask_tensor_shape = [batch_size] + list(exp_config.image_size) images_pl = tf.placeholder(tf.float32, shape=image_tensor_shape, name='images') labels_pl = tf.placeholder(tf.uint8, shape=mask_tensor_shape, name='labels') logits_pl = model.inference(images_pl, exp_config=exp_config, training=tf.constant(False, dtype=tf.bool)) eval_loss = model.evaluation(logits_pl, labels_pl, images_pl, nlabels=exp_config.nlabels, loss_type=exp_config.loss_type) data = acdc_data.load_and_maybe_process_data( input_folder=sys_config.data_root, preprocessing_folder=sys_config.preproc_folder, mode=exp_config.data_mode, size=exp_config.image_size, target_resolution=exp_config.target_resolution, force_overwrite=False, split_test_train=True) images = data['images_test'][:20] labels = data['masks_test'][:20] print("Num images train {} test {}".format(len(data['images_train']), len(images))) saver = tf.train.Saver() init = tf.global_variables_initializer() baseline_closs = 0.0 baseline_cdice = 0.0 attack_closs = 0.0 attack_cdice = 0.0 l2_diff_sum = 0.0 ln_diff_sum = 0.0 ln_diff = 0.0 l2_diff = 0.0 batches = 0 result_dict = [] with tf.Session() as sess: results = [] sess.run(init) checkpoint_path = utils.get_latest_model_checkpoint_path( model_path, 'model_best_dice.ckpt') saver.restore(sess, checkpoint_path) for batch in BackgroundGenerator( train.iterate_minibatches(images, labels, batch_size)): x, y = batch batches += 1 if batches != 9: continue non_adv_mask_out = sess.run( [tf.arg_max(tf.nn.softmax(logits_pl), dimension=-1)], feed_dict={images_pl: x}) if attack == 'fgsm': adv_x = adv_attack.fgsm_run(x, y, images_pl, labels_pl, logits_pl, exp_config, sess, attack_args) elif attack == 'pgd': adv_x = adv_attack.pgd(x, y, images_pl, labels_pl, logits_pl, exp_config, sess, attack_args) elif attack == 'spgd': adv_x = adv_attack.pgd_conv(x, y, images_pl, labels_pl, logits_pl, exp_config, sess, **attack_args) else: raise NotImplementedError adv_x = [adv_x] if add_gaussian: print('adding gaussian noise') adv_x = adv_attack.add_gaussian_noise( x, adv_x[0], sess, eps=attack_args['eps'], sizes=attack_args['sizes'], weights=attack_args['weights']) for i in range(len(adv_x)): l2_diff = np.average( np.squeeze(np.linalg.norm(adv_x[i] - x, axis=(1, 2)))) ln_diff = np.average( np.squeeze( np.linalg.norm(adv_x[i] - x, axis=(1, 2), ord=np.inf))) l2_diff_sum += l2_diff ln_diff_sum += ln_diff print(l2_diff, l2_diff) adv_mask_out = sess.run( [tf.arg_max(tf.nn.softmax(logits_pl), dimension=-1)], feed_dict={images_pl: adv_x[i]}) closs, cdice = sess.run(eval_loss, feed_dict={ images_pl: x, labels_pl: y }) baseline_closs = closs + baseline_closs baseline_cdice = cdice + baseline_cdice adv_closs, adv_cdice = sess.run(eval_loss, feed_dict={ images_pl: adv_x[i], labels_pl: y }) attack_closs = adv_closs + attack_closs attack_cdice = adv_cdice + attack_cdice partial_result = dict({ 'attack': attack, 'attack_args': { k: attack_args[k] for k in ['eps', 'step_alpha', 'epochs'] }, # 'baseline_closs': closs, 'baseline_cdice': cdice, 'attack_closs': adv_closs, 'attack_cdice': adv_cdice, 'attack_l2_diff': l2_diff, 'attack_ln_diff': ln_diff }) jsonString = json.dumps(str(partial_result)) #results.append(copy.deepcopy(result_dict)) with open( "eval_results/{}-{}-{}-{}-metrics.json".format( attack, add_gaussian, batches, i), "w") as jsonFile: jsonFile.write(jsonString) image_gt = "eval_results/ground-truth-{}-{}-{}-{}.pdf".format( attack, add_gaussian, batches, i) plt.imshow(np.squeeze(x), cmap='gray') plt.imshow(np.squeeze(y), cmap='viridis', alpha=0.7) plt.axis('off') plt.tight_layout() plt.savefig(image_gt, format='pdf') plt.clf() image_benign = "eval_results/benign-{}-{}-{}-{}.pdf".format( attack, add_gaussian, batches, i) plt.imshow(np.squeeze(x), cmap='gray') plt.imshow(np.squeeze(non_adv_mask_out), cmap='viridis', alpha=0.7) plt.axis('off') plt.tight_layout() plt.savefig(image_benign, format='pdf') plt.clf() image_adv = "eval_results/adversarial-{}-{}-{}-{}.pdf".format( attack, add_gaussian, batches, i) plt.imshow(np.squeeze(adv_x[i]), cmap='gray') plt.imshow(np.squeeze(adv_mask_out), cmap='viridis', alpha=0.7) plt.axis('off') plt.tight_layout() plt.savefig(image_adv, format='pdf') plt.clf() plt.imshow(np.squeeze(adv_x[i]), cmap='gray') image_adv_input = "eval_results/adv-input-{}-{}-{}-{}.pdf".format( attack, add_gaussian, batches, i) plt.tight_layout() plt.axis('off') plt.savefig(image_adv_input, format='pdf') plt.clf() plt.imshow(np.squeeze(x), cmap='gray') image_adv_input = "eval_results/benign-input-{}-{}-{}-{}.pdf".format( attack, add_gaussian, batches, i) plt.axis('off') plt.tight_layout() plt.savefig(image_adv_input, format='pdf') plt.clf() print(attack_closs, attack_cdice, l2_diff, ln_diff) print("Evaluation results") print("{} Attack Params {}".format(attack, attack_args)) print("Baseline metrics: Avg loss {}, Avg DICE Score {} ".format( baseline_closs / (batches * len(adv_x)), baseline_cdice / (batches * len(adv_x)))) print( "{} Attack effectiveness: Avg loss {}, Avg DICE Score {} ".format( attack, attack_closs / (batches * len(adv_x)), attack_cdice / (batches * len(adv_x)))) print( "{} Attack visibility: Avg l2-norm diff {} Avg l-inf-norm diff {}". format(attack, l2_diff_sum / (batches * len(adv_x)), ln_diff_sum / (batches * len(adv_x)))) result_dict = dict({ 'attack': attack, 'attack_args': {k: attack_args[k] for k in ['eps', 'step_alpha', 'epochs']}, # 'baseline_closs_avg': baseline_closs / batches, 'baseline_cdice_avg': baseline_cdice / batches, 'attack_closs_avg': attack_closs / batches, 'attack_cdice_avg': attack_cdice / batches, 'attack_l2_diff': l2_diff_sum / batches, 'attack_ln_diff': ln_diff_sum / batches }) results.append(copy.deepcopy(result_dict)) print(results) jsonString = json.dumps(results) with open("eval_results/{}-results.json".format(attack), "w") as jsonFile: jsonFile.write(jsonString)
test_label = np.asarray(test_label) f.close() #TensorBoardのグラフに出力するスコープを指定 with tf.Graph().as_default(): # 画像を入れるためのTensor(28*28*3(IMAGE_PIXELS)次元の画像が任意の枚数(None)分はいる) images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS)) # ラベルを入れるためのTensor(3(NUM_CLASSES)次元のラベルが任意の枚数(None)分入る) labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES)) # dropout率を入れる仮のTensor keep_prob = tf.placeholder("float") # inference()を呼び出してモデルを作る logits = model.inference(images_placeholder, keep_prob) # loss()を呼び出して損失を計算 loss_value = model.loss(logits, labels_placeholder) # training()を呼び出して訓練して学習モデルのパラメーターを調整する train_op = model.training(loss_value, FLAGS.learning_rate) # 精度の計算 acc = model.accuracy(logits, labels_placeholder) # 保存の準備 saver = tf.train.Saver() # Sessionの作成(TensorFlowの計算は絶対Sessionの中でやらなきゃだめ) sess = tf.Session()
def fit(): train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7 = model.inference( image_holder, keep_prob) train_loss1, train_loss2, train_loss3, train_loss4, train_loss5, train_loss6, train_loss7 = model.losses( train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7, label_holder) train_op1, train_op2, train_op3, train_op4, train_op5, train_op6, train_op7 = model.trainning( train_loss1, train_loss2, train_loss3, train_loss4, train_loss5, train_loss6, train_loss7, learning_rate) train_acc = model.evaluation(train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7, label_holder) input_image = tf.summary.image('input', image_holder) summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES)) sess = tf.Session() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) start_time1 = time.time() for step in range(count): x_batch, y_batch = get_batch() start_time2 = time.time() time_str = datetime.datetime.now().isoformat() feed_dict = { image_holder: x_batch, label_holder: y_batch, keep_prob: 0.5 } _, _, _, _, _, _, _, tra_loss1, tra_loss2, tra_loss3, tra_loss4, tra_loss5, tra_loss6, tra_loss7, acc, summary_str = sess.run( [ train_op1, train_op2, train_op3, train_op4, train_op5, train_op6, train_op7, train_loss1, train_loss2, train_loss3, train_loss4, train_loss5, train_loss6, train_loss7, train_acc, summary_op ], feed_dict) train_writer.add_summary(summary_str, step) duration = time.time() - start_time2 tra_all_loss = tra_loss1 + tra_loss2 + tra_loss3 + tra_loss4 + tra_loss5 + tra_loss6 + tra_loss7 # print(y_batch) #仅测试代码训练实际样本与标签是否一致 if step % 10 == 0: sec_per_batch = float(duration) print('%s : Step %d,train_loss = %.2f,acc= %.2f,sec/batch=%.3f' % (time_str, step, tra_all_loss, acc, sec_per_batch)) if step % 10000 == 0 or (step + 1) == count: checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) sess.close() print(time.time() - start_time1)
def main(argv=None): try: # Create log dir if not exists if tf.gfile.Exists(FLAGS.log_dir): tf.gfile.DeleteRecursively(FLAGS.log_dir) tf.gfile.MakeDirs(FLAGS.log_dir) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): data_sets = data_input.read_evaluation_and_train_image_batches(FLAGS) train_data_set = data_sets.train evaluation_data_set = data_sets.evaluation images_placeholder, labels_placeholder = utils.create_placeholder_inputs(FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width) # Build a Graph that computes predictions from the inference model. # We use the same weight's etc. for the training and evaluation logits = model.inference(images_placeholder, train_data_set.batch_size, train_data_set.num_classes) # Accuracy correct = tf.nn.in_top_k(logits, labels_placeholder, 1) eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32)) # Add to the Graph the Ops for loss calculation. train_loss = _create_train_loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. global_step = tf.Variable(0, trainable=False) if(FLAGS.optimizer == 0): print("Using gradient descent optimizer.") train_op = _create_gradient_descent_train_op(train_loss, global_step, train_data_set.size) else: print("Using adam optimizer.") train_op = _create_adam_train_op(train_loss, global_step) # Create a saver for writing training checkpoints. saver = tf.train.Saver(tf.all_variables()) # Add tensorboard summaries tf.image_summary('image_train', train_data_set.images, max_images = 5) tf.image_summary('image_evaluation', evaluation_data_set.images, max_images = 5) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Add the variable initializer Op. init = tf.initialize_all_variables() # Create a session for running Ops on the Graph. sess = tf.Session() # And then after everything is built: # Run the Op to initialize the variables. sess.run(init) # Start the queue runners. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph) # # Training loop # try: for step in xrange(FLAGS.max_steps): if coord.should_stop(): break start_time = time.time() train_feed = utils.create_feed_data(sess, images_placeholder, labels_placeholder, train_data_set) _, loss_value = sess.run([train_op, train_loss], feed_dict=train_feed) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' duration = time.time() - start_time # Print step loss etc. to console if step % 10 == 0: steps_per_epoch = train_data_set.size // train_data_set.batch_size epoch = int(step / steps_per_epoch) num_examples_per_step = train_data_set.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) print ('%s: step %d, epoch %d | loss = %.6f | %.1f examples/sec; %.3f ' 'sec/batch' % (datetime.now(), step, epoch, loss_value, examples_per_sec, sec_per_batch)) # Write summary to tensorboard if step % 100 == 0: summary_str = sess.run([summary_op], feed_dict=train_feed) summary_writer.add_summary(summary_str[0], step) # Evaluation if step % 1000 == 0: if step == 0 and not FLAGS.initial_accuracy: continue num_examples, true_count = evaluation.do_eval(sess, eval_correct, images_placeholder, labels_placeholder, evaluation_data_set) precision = true_count / num_examples print(' Evaluation data | Num examples: %d Num correct: %d Precision @ 1: %0.04f' % (num_examples, true_count, precision)) summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy_evaluation", simple_value=precision)]) summary_writer.add_summary(summary, step) num_examples, true_count = evaluation.do_eval(sess, eval_correct, images_placeholder, labels_placeholder, train_data_set) precision = true_count / num_examples print(' Training data | Num examples: %d Num correct: %d Precision @ 1: %0.04f' % (num_examples, true_count, precision)) summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy_train", simple_value=precision)]) summary_writer.add_summary(summary, step) # Save model checkpoint if step % 2000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) except tf.errors.OutOfRangeError: checkpoint_path = os.path.join(FLAGS.log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step)) finally: # Finished print("\nWaiting for all threads...") coord.request_stop() coord.join(threads) print("Closing session...\n") sess.close() except: traceback.print_exc() finally: print("\nDone.\n")
with tf.gfile.FastGFile(os.path.join("./", 'output_graph.pb'), 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') connected = False jaruchinho_socket = socket.socket() camera_socket = socket.socket() camera_socket.bind(('0.0.0.0', 8000)) camera_socket.listen(0) # Accept a single connection and make a file-like object out of it camera_connection = camera_socket.accept()[0].makefile('rb') inf = model.inference() i = 0 f = 'r' try: bytes='' while True: bytes+=camera_connection.read(32) a = bytes.find('\xff\xd8') b = bytes.find('\xff\xd9') if a!=-1 and b!=-1: jpg = bytes[a:b+2] bytes= bytes[b+2:] image = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8),cv2.CV_LOAD_IMAGE_COLOR) cv2.imshow('image',image) # image = cv2.resize(image,None,fx=0.25, fy=0.25, interpolation = cv2.INTER_AREA)
def train(traning_list): """ training mvsnet """ training_sample_size = len(traning_list) print('sample number: ', training_sample_size) with tf.Graph().as_default(), tf.device('/cpu:0'): ########## data iterator ######### # training generators training_generator = iter(MVSGenerator(traning_list, FLAGS.view_num)) generator_data_type = (tf.float32, tf.float32, tf.float32) # dataset from generator training_set = tf.data.Dataset.from_generator( lambda: training_generator, generator_data_type) training_set = training_set.batch(FLAGS.batch_size) # iterators training_iterator = training_set.make_initializable_iterator() ########## optimization options ########## global_step = tf.Variable(0, trainable=False, name='global_step') lr_op = tf.train.exponential_decay(FLAGS.base_lr, global_step=global_step, decay_steps=FLAGS.stepvalue, decay_rate=FLAGS.gamma, name='lr') opt = tf.train.RMSPropOptimizer(learning_rate=lr_op) tower_grads = [] for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('Model_tower%d' % i) as scope: # generate data #import pdb #pdb.set_trace() images, cams, depth_image = training_iterator.get_next() images.set_shape( tf.TensorShape([None, FLAGS.view_num, None, None, 3])) cams.set_shape( tf.TensorShape([None, FLAGS.view_num, 2, 4, 4])) depth_image.set_shape(tf.TensorShape([None, None, None, 1])) depth_start = tf.reshape( tf.slice(cams, [0, 0, 1, 3, 0], [FLAGS.batch_size, 1, 1, 1, 1]), [FLAGS.batch_size]) depth_interval = tf.reshape( tf.slice(cams, [0, 0, 1, 3, 1], [FLAGS.batch_size, 1, 1, 1, 1]), [FLAGS.batch_size]) is_master_gpu = False if i == 0: is_master_gpu = True # inference depth_map, prob_map = inference(images, cams, FLAGS.max_d, depth_start, depth_interval, is_master_gpu) # refinement ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1) refined_depth_map = depth_refine(depth_map, ref_image, FLAGS.max_d, depth_start, depth_interval, is_master_gpu) # loss #import pdb #pdb.set_trace() loss0, less_one_temp, less_three_temp = mvsnet_loss( depth_map, depth_image, depth_interval) loss1, less_one_accuracy, less_three_accuracy = mvsnet_loss( refined_depth_map, depth_image, depth_interval) loss = (loss0 + loss1) / 2 # retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # keep track of the gradients across all towers. tower_grads.append(grads) # average gradient grads = average_gradients(tower_grads) # training opt train_opt = opt.apply_gradients(grads, global_step=global_step) # summary summaries.append(tf.summary.scalar('loss', loss)) summaries.append( tf.summary.scalar('less_one_accuracy', less_one_accuracy)) summaries.append( tf.summary.scalar('less_three_accuracy', less_three_accuracy)) summaries.append(tf.summary.scalar('lr', lr_op)) weights_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) for var in weights_list: summaries.append(tf.summary.histogram(var.op.name, var)) for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) # saver saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) summary_op = tf.summary.merge(summaries) # initialization option init_op = tf.global_variables_initializer() config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # initialization total_step = 0 sess.run(init_op) summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) # load pre-trained model if FLAGS.pretrained_model_ckpt_path is not None: restorer = tf.train.Saver(tf.global_variables()) restorer.restore( sess, '-'.join([ FLAGS.pretrained_model_ckpt_path, str(FLAGS.ckpt_step) ])) print( Notify.INFO, 'Pre-trained model restored from %s' % ('-'.join([ FLAGS.pretrained_model_ckpt_path, str(FLAGS.ckpt_step) ])), Notify.ENDC) total_step = FLAGS.ckpt_step # training several epochs for epoch in range(FLAGS.epoch): # training of one epoch step = 0 sess.run(training_iterator.initializer) for _ in range(int(training_sample_size / FLAGS.num_gpus)): # run one batch start_time = time.time() try: out_summary_op, out_opt, out_loss, out_less_one, out_less_three = sess.run( [ summary_op, train_opt, loss, less_one_accuracy, less_three_accuracy ]) except tf.errors.OutOfRangeError: print("End of dataset") # ==> "End of dataset" break duration = time.time() - start_time # print info if step % FLAGS.display == 0: print( Notify.INFO, 'epoch, %d, step %d, total_step %d, loss = %.4f, (< 1px) = %.4f, (< 3px) = %.4f (%.3f sec/step)' % (epoch, step, total_step, out_loss, out_less_one, out_less_three, duration), Notify.ENDC) # write summary if step % (FLAGS.display * 10) == 0 and FLAGS.is_training: summary_writer.add_summary(out_summary_op, total_step) # save the model checkpoint periodically if (total_step % FLAGS.snapshot == 0 or step == (training_sample_size - 1)) and FLAGS.is_training: checkpoint_path = os.path.join(FLAGS.save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=total_step) step += FLAGS.batch_size * FLAGS.num_gpus total_step += FLAGS.batch_size * FLAGS.num_gpus
def evaluate_one_image(): '''Test one image against the saved models and parameters ''' maintain_one_image() # you need to change the directories to yours. train_dir = '/Users/y/web/uploads/' ## train = input_data.get_files(train_dir) image_array = get_one_image(train) with tf.Graph().as_default(): BATCH_SIZE = 1 N_CLASSES = 11 image = tf.cast(image_array, tf.float32) image = tf.image.per_image_standardization(image) image = tf.reshape(image, [1, 208, 208, 3]) logit = model.inference(image, BATCH_SIZE, N_CLASSES) logit = tf.nn.softmax(logit) x = tf.placeholder(tf.float32, shape=[208, 208, 3]) # you need to change the directories to yours. logs_train_dir = '/Users/y/web/train/logs/train/' saver = tf.train.Saver() with tf.Session() as sess: print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(logs_train_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found') prediction = sess.run(logit, feed_dict={x: image_array}) max_index = np.argmax(prediction) x = '' if max_index == 0: m = "%.6f " % prediction[:, 0] x = 'This is a Black_grass or Loose Silky-bent with possibility ' + m return x if max_index == 1: m = "%.6f " % prediction[:, 1] x = 'This is a Charlock with possibility ' + m return x if max_index == 2: m = "%.6f " % prediction[:, 2] x = 'This is a Cleavers with possibility ' + m return x if max_index == 3: m = "%.6f " % prediction[:, 3] x = 'This is a Chickweed with possibility ' + m return x if max_index == 4: m = "%.6f " % prediction[:, 4] x = 'This is a Commonwheat with possibility ' + m return x if max_index == 5: m = "%.6f " % prediction[:, 5] x = 'This is a Fathen with possibility ' + m return x if max_index == 6: m = "%.6f " % prediction[:, 6] x = 'This is a Maize with possibility ' + m return x if max_index == 7: m = "%.6f " % prediction[:, 7] x = 'This is a Scentless with possibility ' + m return x if max_index == 8: m = "%.6f " % prediction[:, 8] x = 'This is a Shepherds with possibility ' + m return x if max_index == 9: m = "%.6f " % prediction[:, 9] x = 'This is a Cranesbill with possibility ' + m return x else: m = "%.6f " % prediction[:, 10] x = 'This is a Suger with possibility ' + m return x
def train(conf, ckpt=False): """ Train model for a number of steps. Args: conf: configuration dictionary ckpt: restore from ckpt """ cw = conf["cw"] mb_size = conf["mb_size"] path_tmp = conf["path_tmp"] n_epochs = conf["n_epochs"] iw = conf["iw"] grad_norm_thresh = conf["grad_norm_thresh"] tools.reset_tmp(path_tmp, ckpt) # Prepare data tr_stream, te_stream = tools.prepare_data(conf) n_tr = tr_stream.dataset.num_examples n_te = te_stream.dataset.num_examples with tf.Graph().as_default(), tf.device("/cpu:0" if FLAGS.dev_assign else None): # Exponential decay learning rate global_step = tf.get_variable( "global_step", [], initializer=tf.constant_initializer(0), dtype=tf.int32, trainable=False ) lr = tools.exp_decay_lr(global_step, n_tr, conf) # Create an optimizer that performs gradient descent opt = tf.train.AdamOptimizer(lr) # Placeholders Xs = [tf.placeholder(tf.float32, [None, iw, iw, 1], name="X_%02d" % i) for i in range(FLAGS.num_gpus)] Ys = [ tf.placeholder(tf.float32, [None, iw - 2 * cw, iw - 2 * cw, 1], name="Y_%02d" % i) for i in range(FLAGS.num_gpus) ] # Calculate the gradients for each model tower tower_grads = [] y_splits = [] for i in range(FLAGS.num_gpus): with tf.device(("/gpu:%d" % i) if FLAGS.dev_assign else None): with tf.name_scope("%s_%02d" % (FLAGS.tower_name, i)) as scope: # Calculate the loss for one tower. This function constructs # the entire model but shares the variables across all towers. y_split, model_vars = model.inference(Xs[i], conf) y_splits.append(y_split) total_loss = model.loss(y_split, model_vars, Ys[i], conf["l2_reg"], scope) # Calculate the gradients for the batch of data on this tower. gvs = opt.compute_gradients(total_loss) # Optionally clip gradients. if grad_norm_thresh > 0: gvs = tools.clip_by_norm(gvs, grad_norm_thresh) # Keep track of the gradients across all towers. tower_grads.append(gvs) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summs = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) y = tf.concat(0, y_splits, name="y") # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. gvs = tools.average_gradients(tower_grads) # Apply the gradients to adjust the shared variables. apply_grad_op = opt.apply_gradients(gvs, global_step=global_step) # Add a summary to track the learning rate. summs.append(tf.scalar_summary("learning_rate", lr)) # Add histograms for gradients. for g, v in gvs: if g: v_name = re.sub("%s_[0-9]*/" % FLAGS.tower_name, "", v.op.name) summs.append(tf.histogram_summary(v_name + "/gradients", g)) # Tensorflow boilerplate sess, saver, summ_writer, summ_op = tools.tf_boilerplate(summs, conf, ckpt) # Baseline error # bpsnr_tr = tools.baseline_psnr(tr_stream) # bpsnr_te = tools.baseline_psnr(te_stream) # print('approx baseline psnr_tr=%.3f' % bpsnr_tr) # print('approx baseline psnr_te=%.3f' % bpsnr_te) # Train format_str = "%s| %04d PSNR=%.3f (%.3f) (F+B: %.1fex/s; %.1fs/batch)" "(F: %.1fex/s; %.1fs/batch)" # step = 0 step = sess.run(global_step) for epoch in range(n_epochs): print("--- Epoch %d ---" % epoch) # Training for X_c, y_c in tr_stream.get_epoch_iterator(): if X_c.shape[0] < FLAGS.num_gpus: continue y_c = y_c[:, cw:-cw, cw:-cw] chunk_size = X_c.shape[0] gpu_chunk = chunk_size // FLAGS.num_gpus dict_input1 = [ (Xs[i], X_c[i * gpu_chunk : ((i + 1) * gpu_chunk) if (i != FLAGS.num_gpus - 1) else chunk_size]) for i in range(FLAGS.num_gpus) ] dict_input2 = [ (Ys[i], y_c[i * gpu_chunk : ((i + 1) * gpu_chunk) if (i != FLAGS.num_gpus - 1) else chunk_size]) for i in range(FLAGS.num_gpus) ] feed = dict(dict_input1 + dict_input2) start_time = time.time() sess.run(apply_grad_op, feed_dict=feed) duration_tr = time.time() - start_time if step % 40 == 0: feed2 = dict(dict_input1) start_time = time.time() y_eval = sess.run(y, feed_dict=feed2) duration_eval = time.time() - start_time psnr = tools.eval_psnr(y_c, y_eval) bl_psnr = tools.eval_psnr(y_c, X_c[:, cw:-cw, cw:-cw]) ex_per_step_tr = mb_size * FLAGS.num_gpus / duration_tr ex_per_step_eval = mb_size * FLAGS.num_gpus / duration_eval print( format_str % ( datetime.now().time(), step, psnr, bl_psnr, ex_per_step_tr, float(duration_tr / FLAGS.num_gpus), ex_per_step_eval, float(duration_eval / FLAGS.num_gpus), ) ) if step % 50 == 0: summ_str = sess.run(summ_op, feed_dict=feed) summ_writer.add_summary(summ_str, step) if step % 150 == 0: saver.save(sess, os.path.join(path_tmp, "ckpt"), global_step=step) step += 1 # Evaluation # psnr_tr = eval_epoch(Xs, Ys, y, sess, tr_stream, cw) # psnr_te = eval_epoch(Xs, Ys, y, sess, te_stream, cw) # print('approx psnr_tr=%.3f' % psnr_tr) # print('approx psnr_te=%.3f' % psnr_te) saver.save(sess, os.path.join(path_tmp, "ckpt"), global_step=step) saver.save(sess, os.path.join(path_tmp, "ckpt"), global_step=step) tr_stream.close() te_stream.close()
import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import dataset import model BATCHSIZE = 128 NCLASS = 10 handle, train_iterator, val_iterator, iterator = dataset.get_batches_v3( 'train_mnist.tfrecords', 'val_mnist.tfrecords', BATCHSIZE, NCLASS) images, labels = iterator.get_next() # 直接把input pipline 接入计算图 train_out = model.inference(images) loss = model.losses(train_out, labels) train_acc = model.accuray(train_out, labels) train_step = model.train(loss) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) train_handle = sess.run(train_iterator.string_handle()) val_handle = sess.run(val_iterator.string_handle()) num_epoches = 200 for epoch in range(num_epoches): sess.run(train_iterator.initializer)
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. data_sets = tf_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. images_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = model.inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = model.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = model.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = model.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. init = tf.initialize_all_variables() sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph_def=sess.graph_def) # And then after everything is built, start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(data_sets.train, images_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_sets.test)
def training(): train, train_label, val, val_label = input_data.get_files(train_dir, RATIO) train_batch, train_label_batch = input_data.get_batch( train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) val_batch, val_label_batch = input_data.get_batch(val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES) loss = model.losses(logits, train_label_batch) train_op = model.trainning(loss, learning_rate) acc = model.evaluation(logits, train_label_batch) x = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMG_W, IMG_H, 3]) y_ = tf.placeholder(tf.int16, shape=[BATCH_SIZE]) with tf.Session() as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(train_logs_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] saver.restore(sess, './logs/train/model.ckpt-10000') #saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(train_logs_dir, sess.graph) val_writer = tf.summary.FileWriter(val_logs_dir, sess.graph) try: for step in np.arange(10001, MAX_STEP): if coord.should_stop(): break tra_images, tra_labels = sess.run( [train_batch, train_label_batch]) _, tra_loss, tra_acc = sess.run([train_op, loss, acc], feed_dict={ x: tra_images, y_: tra_labels }) if step % 50 == 0: print( 'Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run( [val_batch, val_label_batch]) val_loss, val_acc = sess.run([loss, acc], feed_dict={ x: val_images, y_: val_labels }) print( '** Step %d, val loss = %.2f, val accuracy = %.2f%% **' % (step, val_loss, val_acc * 100.0)) summary_str = sess.run(summary_op) val_writer.add_summary(summary_str, step) if (step != 0 and step % 1000 == 0) or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_logs_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) writer = tf.summary.FileWriter(train_logs_dir) writer.add_graph(sess.graph)