def run(): #Create log_dir for evaluation information if not os.path.exists( TassMovidiusEval._confs["ClassifierSettings"]["log_eval"]): os.mkdir(TassMovidiusEval._confs["ClassifierSettings"]["log_eval"]) #Just construct the graph from scratch again with tf.Graph().as_default() as graph: tf.logging.set_verbosity(tf.logging.INFO) #Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing dataset = TassMovidiusEval.getSplit('validation') images, raw_images, labels = TassMovidiusEval.loadBatch( dataset, is_training=False) #Create some information about the training steps num_batches_per_epoch = dataset.num_samples / TassMovidiusEval._confs[ "ClassifierSettings"]["test_batch_size"] num_steps_per_epoch = num_batches_per_epoch #Now create the inference model but set is_training=False with slim.arg_scope(inception_v3_arg_scope()): logits, end_points = inception_v3(images, num_classes=dataset.num_classes, is_training=False) # #get all the variables to restore from the checkpoint file and create the saver function to restore variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(variables_to_restore) def restore_fn(sess): return saver.restore(sess, TassMovidiusEval.checkpoint_file) #Just define the metrics to track without the loss or whatsoever probabilities = end_points['Predictions'] predictions = tf.argmax(probabilities, 1) accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy( predictions, labels) metrics_op = tf.group(accuracy_update) #Create the global step and an increment op for monitoring global_step = get_or_create_global_step() global_step_op = tf.assign( global_step, global_step + 1 ) #no apply_gradient method so manually increasing the global_step #Create a evaluation step function def eval_step(sess, metrics_op, global_step): ''' Simply takes in a session, runs the metrics op and some logging information. ''' start_time = time.time() _, global_step_count, accuracy_value = sess.run( [metrics_op, global_step_op, accuracy]) time_elapsed = time.time() - start_time #Log some information logging.info( 'Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)', global_step_count, accuracy_value, time_elapsed) return accuracy_value #Define some scalar quantities to monitor tf.summary.scalar('Validation_Accuracy', accuracy) my_summary_op = tf.summary.merge_all() #Get your supervisor sv = tf.train.Supervisor( logdir=TassMovidiusEval._confs["ClassifierSettings"]["log_eval"], summary_op=None, init_fn=restore_fn) #Now we are ready to run in one session with sv.managed_session() as sess: for step in range( int(num_batches_per_epoch * TassMovidiusEval._confs["ClassifierSettings"] ["test_num_epochs"])): #print vital information every start of the epoch as always if step % num_batches_per_epoch == 0: logging.info( 'Epoch: %s/%s', step / num_batches_per_epoch + 1, TassMovidiusEval._confs["ClassifierSettings"] ["test_num_epochs"]) logging.info('Current Streaming Accuracy: %.4f', sess.run(accuracy)) #Compute summaries every 10 steps and continue evaluating if step % 10 == 0: eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step) summaries = sess.run(my_summary_op) sv.summary_computed(sess, summaries) #Otherwise just run as per normal else: eval_step(sess, metrics_op=metrics_op, global_step=sv.global_step) #At the end of all the evaluation, show the final accuracy logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy)) #Now we want to visualize the last batch's images just to see what our model has predicted raw_images, labels, predictions, probabilities = sess.run( [raw_images, labels, predictions, probabilities]) for i in range(10): image, label, prediction, probability = raw_images[i], labels[ i], predictions[i], probabilities[i] prediction_name, label_name = dataset.labels_to_name[ prediction], dataset.labels_to_name[label] text = 'Prediction: %s \n Ground Truth: %s \n Probability: %s' % ( prediction_name, label_name, probability[prediction]) cv2.imwrite('model/eval/' + i + '.png', image) img_plot = cv2.imread('model/eval/' + i + '.png') pl.plot(img_plot.axes.get_xaxis().set_ticks([]), img_plot.axes.get_yaxis().set_ticks([])) pl.title(text) pl.savefig('model/eval/' + i + "fig.png") logging.info( 'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.' ) sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
def run(): humanStart = datetime.now() clockStart = time.time() print("-- Training Starting ") print("-- STARTED: ", humanStart) print("") #Open the labels file Trainer.labels = open(Trainer._confs["ClassifierSettings"]["labels_file"], 'r') #Create a dictionary to refer each label to their string name for line in Trainer.labels: label, string_name = line.split(':') string_name = string_name[:-1] #Remove newline Trainer.labelsToName[int(label)] = string_name #Create a dictionary that will help people understand your dataset better. This is required by the Dataset class later. Trainer.items_to_descriptions = { 'image': 'A 3-channel RGB coloured image that is ex: office, people', 'label': 'A label that ,start from zero' } #Create the log directory here. Must be done here otherwise import will activate this unneededly. if not os.path.exists(Trainer._confs["ClassifierSettings"]["log_dir"]): os.mkdir(Trainer._confs["ClassifierSettings"]["log_dir"]) #======================= TRAINING PROCESS ========================= #Now we start to construct the graph and build our model with tf.Graph().as_default() as graph: tf.logging.set_verbosity(tf.logging.INFO) #Set the verbosity to INFO level #First create the dataset and load one batch dataset = Trainer.getSplit('train') images, _, labels = Trainer.loadBatch(dataset) #Know the number steps to take before decaying the learning rate and batches per epoch num_batches_per_epoch = dataset.num_samples // Trainer._confs["ClassifierSettings"]["batch_size"] num_steps_per_epoch = num_batches_per_epoch #Because one step is one batch processed decay_steps = int(Trainer._confs["ClassifierSettings"]["num_epochs_before_decay"] * num_steps_per_epoch) #Create the model inference with slim.arg_scope(inception_v3_arg_scope()): logits, end_points = inception_v3(images, num_classes = dataset.num_classes, is_training = True) #Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!) one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes) #Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks loss = tf.losses.softmax_cross_entropy(onehot_labels = one_hot_labels, logits = logits) total_loss = tf.losses.get_total_loss() #obtain the regularization losses as well #Create the global step for monitoring the learning_rate and training. global_step = get_or_create_global_step() #Define your exponentially decaying learning rate lr = tf.train.exponential_decay( learning_rate = Trainer._confs["ClassifierSettings"]["initial_learning_rate"], global_step = global_step, decay_steps = decay_steps, decay_rate = Trainer._confs["ClassifierSettings"]["learning_rate_decay_factor"], staircase = True) #Now we can define the optimizer that takes on the learning rate optimizer = tf.train.AdamOptimizer(learning_rate = lr) #optimizer = tf.train.RMSPropOptimizer(learning_rate = lr, momentum=0.9) #Create the train_op. train_op = slim.learning.create_train_op(total_loss, optimizer) #State the metrics that you want to predict. We get a predictions that is not one_hot_encoded. predictions = tf.argmax(end_points['Predictions'], 1) probabilities = end_points['Predictions'] accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels) metrics_op = tf.group(accuracy_update, probabilities) #Now finally create all the summaries you need to monitor and group them into one summary op. tf.summary.scalar('losses/Total_Loss', total_loss) tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('learning_rate', lr) my_summary_op = tf.summary.merge_all() #Now we need to create a training step function that runs both the train_op, metrics_op and updates the global_step concurrently. def train_step(sess, train_op, global_step, epochCount): ''' Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step ''' #Check the time for each sess run start_time = time.time() total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op]) time_elapsed = time.time() - start_time #Run the logging to print some results logging.info(' Epch %.2f Glb Stp %s: Loss: %.4f (%.2f sec/step)', epochCount, global_step_count, total_loss, time_elapsed) return total_loss, global_step_count #Define your supervisor for running a managed session. Do not run the summary_op automatically or else it will consume too much memory sv = tf.train.Supervisor(logdir = Trainer._confs["ClassifierSettings"]["log_dir"], summary_op = None) #Run the managed session with sv.managed_session() as sess: for step in range(num_steps_per_epoch * Trainer._confs["ClassifierSettings"]["dev_cloud_epochs"]): #At the start of every epoch, show the vital information: if step % num_batches_per_epoch == 0: logging.info('Epoch %s/%s', step/num_batches_per_epoch + 1, Trainer._confs["ClassifierSettings"]["dev_cloud_epochs"]) learning_rate_value, accuracy_value = sess.run([lr, accuracy]) logging.info('Current Learning Rate: %s', learning_rate_value) logging.info('Current Streaming Accuracy: %s', accuracy_value) # optionally, print your logits and predictions for a sanity check that things are going fine. logits_value, probabilities_value, predictions_value, labels_value = sess.run([logits, probabilities, predictions, labels]) print('logits: \n', logits_value[:5]) print('Probabilities: \n', probabilities_value[:5]) print('predictions: \n', predictions_value[:100]) print('Labels:\n:', labels_value[:100]) #Log the summaries every 10 step. if step % 10 == 0: loss, _ = train_step(sess, train_op, sv.global_step, step/num_batches_per_epoch + 1) summaries = sess.run(my_summary_op) sv.summary_computed(sess, summaries) #If not, simply run the training step else: loss, _ = train_step(sess, train_op, sv.global_step, step/num_batches_per_epoch + 1) #We log the final training loss and accuracy logging.info('Final Loss: %s', loss) logging.info('Final Accuracy: %s', sess.run(accuracy)) #Once all the training has been done, save the log files and checkpoint model logging.info('Finished training! Saving model to disk now.') checkpoint_file = tf.train.latest_checkpoint(Trainer._confs["ClassifierSettings"]["log_dir"]) with tf.Graph().as_default() as graph: #images = tf.placeholder(shape=[None, image_size, image_size, 3], dtype=tf.float32, name = 'Placeholder_only') images = tf.placeholder("float", [1, Trainer._confs["ClassifierSettings"]["image_size"], Trainer._confs["ClassifierSettings"]["image_size"], 3], name="input") with slim.arg_scope(inception_v3_arg_scope()): logits, end_points = inception_v3(images, num_classes = Trainer._confs["ClassifierSettings"]["num_classes"], is_training = False) probabilities = tf.nn.softmax(logits) saver = tf.train.Saver(slim.get_variables_to_restore()) #Setup graph def input_graph_def = graph.as_graph_def() output_node_names = "InceptionV3/Predictions/Softmax" output_graph_name = "model/DevCloudIDC.pb" with tf.Session(config=config) as sess: saver.restore(sess, checkpoint_file) #Exporting the graph print ("Exporting graph...") output_graph_def = graph_util.convert_variables_to_constants( sess, input_graph_def, output_node_names.split(",")) with tf.gfile.GFile(output_graph_name, "wb") as f: f.write(output_graph_def.SerializeToString()) humanEnd = datetime.now() clockEnd = time.time() print("") print("-- Training Ending ") print("-- ENDED: ", humanEnd) print("-- TIME: {0}".format(clockEnd - clockStart)) print("")