def evaluation(): with tf.Graph().as_default(): n_test = cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL eval_images, eval_lables = cifar10_input.inputs(DATA_DIR, BATCH_SIZE) eval_logits = cifar10_model.inference(eval_images) #测试预测值 # tf.nn.in_top_k(predictions, targets, k, name=None) # 每个样本的预测结果的前k个最大的数里面是否包括包含targets预测中的标签,一般取1, # 即取预测最大概率的索引与标签的对比 top_k_op = tf.nn.in_top_k(eval_logits, eval_lables, 1) saver = tf.train.Saver() with tf.Session() as session: ckpt = tf.train.get_checkpoint_state('./signal_GPU/saver') if ckpt and ckpt.model_checkpoint_path: saver.restore(session, ckpt.model_checkpoint_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=session, coord=coord) num_iter = int(n_test / BATCH_SIZE) true_count = 0 for step in range(num_iter): predictions = session.run(top_k_op) true_count = true_count + np.sum(predictions) precision = true_count / (num_iter * BATCH_SIZE) print('precision=', precision) coord.request_stop() coord.join(threads)
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) images, labels = cifar10_model.distorted_inputs() logits = cifar10_model.inference(images) loss = cifar10_model.loss(logits, labels) train_op = cifar10_model.train(loss, global_step) init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) tf.train.start_queue_runners(sess=sess) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time if step % 100 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))
def train(): my_global_step = tf.Variable(0, name='global_step', trainable=False) data_dir = './data/cifar-10-batches-bin/' log_dir = './logs/train/' images, labels = cifar10_input.read_cifar10(data_dir=data_dir, is_train=True, batch_size= BATCH_SIZE, shuffle=True) logits = cifar10_model.inference(images, BATCH_SIZE, n_classes=N_CLASSES) loss = cifar10_model.losses(logits, labels) optimizer = tf.train.GradientDescentOptimizer(learning_rate) # 定义优化器 train_op = optimizer.minimize(loss, global_step= my_global_step) # 运行优化 saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break _, loss_value = sess.run([train_op, loss]) if step % 50 == 0: print ('Step: %d, loss: %.4f' % (step, loss_value)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close() #%%
def train(): # 读取图片并带入网络计算 images, labels = cifar10_input.distorted_inputs(DATA_DIR, BATCH_SIZE) t_logits = cifar10_model.inference(images) # 损失值 t_loss = cifar10_model.loss(t_logits, labels) tf.summary.scalar('loss_value', t_loss) # 优化器 global_step = tf.Variable(0, trainable=False) t_optimizer = cifar10_model.train_step(t_loss, global_step) # 准确值 t_accuracy = cifar10_model.accuracy(t_logits, labels) # 训练集正确率计算 tf.summary.scalar('accuracy_value', t_accuracy) merged = tf.summary.merge_all() saver = tf.train.Saver() Accuracy_value = [] Loss_value = [] # 设定定量的GPU显存使用量 config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.5 with tf.Session(config=config) as session: session.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=session, coord=coord) train_writer = tf.summary.FileWriter('./signal_GPU/logs', session.graph) for index in range(EPOCHES): _, loss_value, accuracy_value, summary = session.run( [t_optimizer, t_loss, t_accuracy, merged]) Accuracy_value.append(accuracy_value) Loss_value.append(loss_value) if index % 1000 == 0: print('index:', index, ' loss_value:', loss_value, ' accuracy_value:', accuracy_value) train_writer.add_summary(summary, index) saver.save(session, os.path.join('./signal_GPU/saver/', 'model.ckpt')) # accuracy value plt.figure(figsize=(20, 10)) plt.plot(range(EPOCHES), Accuracy_value) plt.xlabel('training step') plt.ylabel('accuracy value') plt.title('the accuracy value of training data') plt.savefig('./signal_GPU/accuracy.png') # loss value plt.figure() plt.plot(range(EPOCHES), Loss_value) plt.xlabel('training value') plt.ylabel('loss value') plt.title('the value of the loss function of the training data') plt.savefig('./signal_GPU/loss.png') # train_writer.close() coord.request_stop() coord.join(threads)
def evaluate(): with tf.Graph().as_default(): log_dir = './logs/train/' test_dir = './data/cifar-10-batches-bin/' n_test = 10000 # reading test data images, labels = cifar10_input.read_cifar10(data_dir=test_dir, is_train=False, batch_size= BATCH_SIZE, shuffle=False) logits = cifar10_model.inference(images, BATCH_SIZE, N_CLASSES) # 比较真实label和预测值 top_k_op = tf.nn.in_top_k(logits, labels, 1) saver = tf.train.Saver(tf.global_variables()) with tf.Session() as sess: # 读取模型文件 print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(log_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('Loading success, global_step is %s' % global_step) else: print('No checkpoint file found') return coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess = sess, coord = coord) try: num_iter = int(math.ceil(n_test / BATCH_SIZE)) # 10000/64 取整 true_count = 0 total_sample_count = num_iter * BATCH_SIZE # 我们测试的数量 step = 0 while step < num_iter and not coord.should_stop(): predictions = sess.run([top_k_op]) true_count += np.sum(predictions) step += 1 precision = true_count / total_sample_count print('precision = %.3f' % precision) except Exception as e: coord.request_stop(e) finally: coord.request_stop() coord.join(threads) #%%
def model_fn(features, labels, mode, params): logits = cifar10_model.inference(image_batch=features, batch_size=params.get('batch_size')) loss = cifar10_model.loss(logits, labels) train_op = cifar10_model.train(loss, batch_size=params.get('batch_size')) if mode == tf.estimator.ModeKeys.TRAIN: logging_hook = tf.train.LoggingTensorHook({'loss': loss}, every_n_iter=1000) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])
def run_predicting(class_names, images, real_labels=None): """ Run image predicting. Use matplotlib to draw the predicting results. :param class_names: names of labels. :param images: A numpy array of shape [NUM, HEIGHT, WIDTH, DEPTH], float32, represents the images to predict. :param real_labels: A numpy array of shape [NUM], int32, each element represents the class id of the image to predict. :return: if `pred_labels` is not none, returns the accuracy, else return none. """ with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # images = sess.run(central_crop_images(images)) # logits, _ = cifar10_model.inference(tf.convert_to_tensor(images), iteration=None, is_test=tf.convert_to_tensor(True), keep_prob=1.0) logits, _ = cifar10_model.inference(central_crop_images(images), iteration=None, is_test=tf.convert_to_tensor(True), keep_prob=1.0) pred_op = prediction(logits) ckpt = tf.train.get_checkpoint_state(cifar10_train.LOG_DIR) saver = tf.train.Saver() if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) pred_labels = sess.run(pred_op) pred_labels = [class_names[i] for i in pred_labels] if real_labels is not None: eval_op = cifar10_eval.evaluation(logits, real_labels) accuracy = sess.run(eval_op) print("accuracy: %f" % accuracy) rlbl = [class_names[i] for i in real_labels] datavis.data_vis(images, pred_labels, rlbl) else: print('You must train before use!')
def run_training(): cifar10_data = Cifar10Data('./input_data') images_pl = tf.placeholder(tf.float32, [ None, cifar10_model.IMAGE_BATCH_HEIGHT, cifar10_model.IMAGE_BATCH_WIDTH, cifar10_model.IMAGE_BATCH_DEPTH ]) labels_pl = tf.placeholder(tf.int32) keep_prob_pl = tf.placeholder(tf.float32) learning_rate_pl = tf.placeholder(tf.float32) is_test_pl = tf.placeholder(tf.bool) iter_pl = tf.placeholder(tf.int32) with tf.Session() as sess: logits, update_ema = cifar10_model.inference(images_pl, iter_pl, is_test_pl, keep_prob_pl) total_loss = cifar10_model.loss(logits, labels_pl) train_op = cifar10_model.train(total_loss, learning_rate_pl) eval_op = cifar10_eval.evaluation(logits, labels_pl) saver = tf.train.Saver() summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph) sess.run(tf.global_variables_initializer()) # learning rate decay max_learning_rate = 0.02 # 0.003 min_learning_rate = 0.0001 decay_speed = 1600.0 # 2000.0 for step in xrange(MAX_STEPS): print('step %d/%d' % (step, MAX_STEPS)) start_time = time.time() images_feed, labels_feed = cifar10_data.random_training_batch( cifar10_model.BATCH_SIZE) learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp( -step / decay_speed) images_feed = sess.run( cifar10_model.random_distort_images(images_feed)) feed_dict = { images_pl: images_feed, labels_pl: labels_feed, keep_prob_pl: 0.75, learning_rate_pl: learning_rate, is_test_pl: False, iter_pl: step } sess.run(train_op, feed_dict=feed_dict) feed_dict = { images_pl: images_feed, labels_pl: labels_feed, keep_prob_pl: 1.0, learning_rate_pl: learning_rate, is_test_pl: False, iter_pl: step } sess.run(update_ema, feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if (step + 1) % 100 == 0 or (step + 1) == MAX_STEPS: train_eval_val, loss_value = sess.run([eval_op, total_loss], feed_dict=feed_dict) print('Step %d: loss = %.2f, lr = %f (%.3f sec)' % (step + 1, loss_value, learning_rate, duration)) print('Training Data Eval: %.4f' % train_eval_val) summary_str = sess.run(summary, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # Evaluate the model periodically. # feed_dict = {images_pl: data_sets.testing_image, # labels_pl: data_sets.testing_label} # test_eval_val = sess.run(eval_op, feed_dict=feed_dict) test_eval_val, test_loss_val = cifar10_eval.mass_evaluation( cifar10_data, sess, eval_op, total_loss, images_pl, labels_pl, keep_prob_pl, is_test_pl) print('Testing Data Eval: ' + str(test_eval_val) + ' loss: ' + str(test_loss_val)) # Save a checkpoint periodically. if (step + 1) % 1000 == 0 or (step + 1) == MAX_STEPS: checkpoint_file = os.path.join(LOG_DIR, 'model.ckpt') saver.save(sess, checkpoint_file, global_step=step) summary_writer.close()
def main(argv=None): global_step = tf.Variable(0, trainable=False) train_placeholder = tf.placeholder(tf.float32, shape=[32, 32, 3], name='input_image') label_placeholder = tf.placeholder(tf.int32, shape=[1], name='label') # (width, height, depth) -> (batch, width, height, depth) image_node = tf.expand_dims(train_placeholder, 0) logits = model.inference(image_node) total_loss = _loss(logits, label_placeholder) train_op = _train(total_loss, global_step) summary = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) total_duration = 0 writer = tf.summary.FileWriter('./tensorflow_log', sess.graph) summary_i = 0 for epoch in range(1, FLAGS.epoch + 1): start_time = time.time() for file_index in range(5): print('Epoch %d: %s' % (epoch, filenames[file_index])) reader = Cifar10Reader(filenames[file_index]) for index in range(10000): if index % 100 == 0: accurancy = 0.0 accurate_count = 0 accurate_tried_count = 0 image = reader.read(index) _, loss_value, logits_value = sess.run( [train_op, total_loss, logits], feed_dict={ train_placeholder: image.byte_array, label_placeholder: image.label }) accurate_tried_count += 1 result = np.argmax(logits_value, 1) if ("%d" % image.label) == ("%d" % result): accurate_count += 1 assert not np.isnan(loss_value), \ 'Model diverged with loss = NaN' if index % 100 == 99: print('[%d]: %r' % (image.label, logits_value)) print('Inference: %r' % result) accurancy = accurate_count / accurate_tried_count print('Accurancy: %f' % accurancy) summary_i += 1 summary_str = sess.run(summary, feed_dict={ train_placeholder: image.byte_array, label_placeholder: image.label }) writer.add_summary(summary_str, summary_i) writer.flush() reader.close() duration = time.time() - start_time total_duration += duration print('Total duration = %d sec' % total_duration)
num_parallel_calls=4, num_epoch=1) train_iterator = train_dataset.make_initializable_iterator() test_iterator = test_dataset.make_initializable_iterator() train_handle = train_iterator.string_handle() test_handle = test_iterator.string_handle() # build public data entrance handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle(handle, train_iterator.output_types) labels, images = iterator.get_next() # set global step counter global_step = tf.Variable(initial_value=0, trainable=False, name='global_step') # inference logits_before_softmax = inference(images) with tf.name_scope('train_loss'): # compute loss function batch_loss, total_loss = loss_func(labels, logits_before_softmax) # summary the train loss tf.summary.scalar(name='train_loss', tensor=batch_loss) with tf.name_scope('optimization'): # define a placeholder to control learning raw lr = tf.placeholder(dtype=tf.float32, shape=[], name='learning_rate') # optimize the model train_op = tf.train.AdamOptimizer(learning_rate=1e-3, beta1=0.9, beta2=0.999, epsilon=1e-08).minimize(batch_loss, global_step=global_step)
def train(): """ train cifar10 for a number of steps """ with tf.Graph().as_default(): global_step = tf.train.get_checkpoint_state() # get images and labels for cifar-10 # force input pipeline to CPU:0 to avoid operations sometimes ending up # on GPU and resulting in a show down. with tf.device('/cpu:0'): images, labels = cifar10_model.distorted_inputs() # build a graph that computes the logits predictions from # the inference model. logits = cifar10_model.inference(images) # calculate loss. loss = cifar10_model.loss(logits, labels) # build a graph that trains the model with one batch of examples # and updates the model parameters train_op = cifar10_model.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """ Logs loss and runtime. """ def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)