def tower_loss(scope, images, labels): """Calculate the total loss on a single tower running the AlexNet model. Args: scope: unique prefix string identifying the AlexNet tower, e.g. 'tower_0' images: Images. 4D tensor of shape [batch_size, height, width, 3]. labels: Labels. 1D tensor of shape [batch_size]. Returns: Tensor of shape [] containing the total loss for a batch of data """ # Build inference Graph. logits = alexnet.inference(images) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = alexnet.loss(logits, labels) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = re.sub('%s_[0-9]*/' % alexnet.TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def train(): with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() with tf.device('/cpu:0'): dataset = input_fn(FLAGS.batch_size) iterator = dataset.make_one_shot_iterator() next_examples, next_labels = iterator.get_next() # Build a Graph computing logits prediction from the # inference model logits = alexnet.inference(next_examples['image_data']) # Calculate loss loss = alexnet.loss(logits, next_labels) # Build a Graph training the model with one batch of examples and # updating the model parameters train_op = alexnet.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results example_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) print( f'{datetime.now()}: step {self._step}, loss = {loss_value:.2f} ' f'({example_per_sec:.1f} examples/sec; {sec_per_batch:.3f} sec/batch)' ) # Automatically initializes and/or restores variables before returning # MonitoredSession.run() automatically recovers from PS failure, # and can run additional code in hooks with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_step), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto( log_device_placement=False, gpu_options=tf.GPUOptions(allow_growth=True))) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def evaluate(images, checkpoint_dir): weights = weight_variable() biases = bias_variable() x = tf.placeholder(tf.float32, [None, 227, 227, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) keep_prob = tf.placeholder(tf.float32) pred = inference(x, weights, biases, keep_prob) saver = tf.train.Saver() with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print 'load model %s' %(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found at %s' % checkpoint_dir) output = sess.run(pred, feed_dict={x: images, keep_prob: 1.}) return output
def evalate_by_class(data_dir, checkpoint_dir=None, model_checkpoint_path=None): weights = weight_variable() biases = bias_variable() x = tf.placeholder(tf.float32, [None, 227, 227, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) keep_prob = tf.placeholder(tf.float32) pred = inference(x, weights, biases, keep_prob) saver = tf.train.Saver() with tf.Session() as sess: if checkpoint_dir: ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print 'load latest model %s' %(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found at %s' % checkpoint_dir) return elif model_checkpoint_path: print 'load model %s' %(model_checkpoint_path) saver.restore(sess, model_checkpoint_path) else: return total_true = 0 total_examples = 0 for name in class_names: dataset = LazyDataSet(data_dir=data_dir, label=name, batch_size=50) true_count = 0 for i in range(dataset.num_epochs): images, labels = dataset.next_batch() output = sess.run(pred, feed_dict={x: images, keep_prob: 1.}) batch_true_count = eval_once(output, labels) true_count += batch_true_count print('step:%d, %d/%d, accuracy: %g' %(i, batch_true_count, dataset.batch_size, batch_true_count * 1.0 / dataset.batch_size)) num_examples = dataset.num_epochs * dataset.batch_size print('class:%s, %d/%d, accuracy: %g' %(name, true_count, num_examples, true_count * 1.0 / num_examples)) total_true += true_count total_examples += num_examples print('%d/%d, total accuracy: %g' %(total_true, total_examples, total_true * 1.0 / total_examples))
def main(argv=None): # test data input test_file = tf.train.match_filenames_once( "../cifar-10-data/eval.tfrecords") test_dataset = tf.data.TFRecordDataset(test_file) test_dataset = test_dataset.map(alexnet.test_parser) test_dataset = test_dataset.batch(1000) test_dataset = test_dataset.repeat(None) test_iterator = test_dataset.make_initializable_iterator() test_image, test_label = test_iterator.get_next() # test test_logits = alexnet.inference(test_image, False) test_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=test_logits, labels=test_label) test_loss = tf.reduce_mean(test_cross_entropy) correct_prediction = tf.equal( tf.argmax(test_logits, -1, output_type=tf.int32), test_label) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) saver = tf.train.Saver() with tf.Session() as sess: sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) sess.run(test_iterator.initializer) ckpt = tf.train.get_checkpoint_state(alexnet_train.MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: # for i in range(10): saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] test_loss_value, accuracy_score = sess.run([test_loss, accuracy]) print("steps: %s, test_loss: %g, accuracy: %g" % (global_step, test_loss_value, accuracy_score)) else: print('No checkpoint file found') return
def main(argv=None): # train data input train_file = tf.train.match_filenames_once("../cifar-10-data/train.tfrecords") train_dataset = tf.data.TFRecordDataset(train_file) train_dataset = train_dataset.map(alexnet.train_parser) train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE) train_dataset = train_dataset.repeat(None) train_iterator = train_dataset.make_initializable_iterator() train_image, train_label = train_iterator.get_next() # train y = alexnet.inference(train_image, True) global_step = tf.Variable(0, trainable=False) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=train_label) cross_entropy_mean = tf.reduce_mean(cross_entropy) tf.add_to_collection('losses', cross_entropy_mean) loss = tf.add_n(tf.get_collection('losses')) correct_prediction = tf.equal(tf.argmax(y, -1, output_type=tf.int32), train_label) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, DECAY_STEPS, LEARNING_RATE_DECAY, staircase=True) train_step = tf.train.MomentumOptimizer(learning_rate, MOMENTUM).minimize(loss, global_step=global_step) saver = tf.train.Saver() with tf.Session() as sess: sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) sess.run(train_iterator.initializer) f = open('result/result2.txt', 'a') for i in range(TRAINING_STEPS): _, loss_value, step, accuracy_score = sess.run([train_step, loss, global_step, accuracy]) if i % 100 == 0: print("steps: %d, loss: %g, accuracy: %g" % (step, loss_value, accuracy_score)) f.write("%d\t%f\t%f\n" % (step, loss_value, accuracy_score)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step) f.close()
data = load_datasets() learning_rate = 0.05 dropout = 0.8 training_iters = 20000 batch_size = 64 display_step = 10 x = tf.placeholder(tf.float32, [None, 227, 227, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) keep_prob = tf.placeholder(tf.float32) weights, biases = parameters() pred = inference(x, weights, biases, keep_prob) # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) softmax_linear = tf.nn.softmax(pred) cost = tf.reduce_mean( -tf.reduce_sum(y * tf.log(softmax_linear + 1e-10), reduction_indices=[1])) # softmax_linear = = tf.nn.log_softmax(pred) # cost = -tf.reduce_sum(y * softmax_linear) optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(cost) correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
num_classes = 2 # 需要重新训练的层 train_layers = ['fc8', 'fc7', 'fc6'] # 读取本地图片,制作自己的训练集,返回image_batch,label_batch train, train_label = input_data.get_files(train_dir) x, y = input_data.get_batch(train, train_label, image_size, image_size, batch_size, 2000) # 用于计算图输入和输出的TF占位符,每次读取一小部分数据作为当前的训练数据来执行反向传播算法 # x =tf.placeholder(tf.float32,[batch_size,227,227,3],name='x-input') # y =tf.placeholder(tf.float32,[batch_size,num_classes]) keep_prob = tf.placeholder(tf.float32) # 定义神经网络结构,初始化模型 # model = AlexNet(x, keep_prob, num_classes, train_layers) score = inference(x) # 获得神经网络前向传播的输出 # score = model.fc8 # 获得想要训练的层的可训练变量列表 var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers] # 定义损失函数,获得loss with tf.name_scope("cross_ent"): loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=score, labels=y)) # 定义反向传播算法(优化算法) with tf.name_scope("train"): # 获得所有可训练变量的梯度 gradients = tf.gradients(loss, var_list) gradients = list(zip(gradients, var_list))
''' MNIST number recognization Task. 1.Read MNIST data 2.Design Alexnet 3.Do training 4.Get training info - Validation accuracy for every 5 step.(Batch-normalization) - Get final accuracy - Add name and group for tensorboard ''' import tensorflow as tf import alexnet from tensorflow.examples.tutorials.mnist import input_data class MNISTRecognition: def __init__(self): return def inference(self, input): return if __name__ == '__main__': mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) alexnet.inference()