示例#1
0
def run():

    #Create log_dir for evaluation information
    if not os.path.exists(
            TassMovidiusEval._confs["ClassifierSettings"]["log_eval"]):

        os.mkdir(TassMovidiusEval._confs["ClassifierSettings"]["log_eval"])

    #Just construct the graph from scratch again
    with tf.Graph().as_default() as graph:

        tf.logging.set_verbosity(tf.logging.INFO)

        #Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing
        dataset = TassMovidiusEval.getSplit('validation')
        images, raw_images, labels = TassMovidiusEval.loadBatch(
            dataset, is_training=False)

        #Create some information about the training steps
        num_batches_per_epoch = dataset.num_samples / TassMovidiusEval._confs[
            "ClassifierSettings"]["test_batch_size"]
        num_steps_per_epoch = num_batches_per_epoch

        #Now create the inference model but set is_training=False
        with slim.arg_scope(inception_v3_arg_scope()):

            logits, end_points = inception_v3(images,
                                              num_classes=dataset.num_classes,
                                              is_training=False)

        # #get all the variables to restore from the checkpoint file and create the saver function to restore
        variables_to_restore = slim.get_variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        def restore_fn(sess):

            return saver.restore(sess, TassMovidiusEval.checkpoint_file)

        #Just define the metrics to track without the loss or whatsoever
        probabilities = end_points['Predictions']
        predictions = tf.argmax(probabilities, 1)

        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(
            predictions, labels)
        metrics_op = tf.group(accuracy_update)

        #Create the global step and an increment op for monitoring
        global_step = get_or_create_global_step()
        global_step_op = tf.assign(
            global_step, global_step + 1
        )  #no apply_gradient method so manually increasing the global_step

        #Create a evaluation step function
        def eval_step(sess, metrics_op, global_step):
            '''
            Simply takes in a session, runs the metrics op and some logging information.
            '''
            start_time = time.time()
            _, global_step_count, accuracy_value = sess.run(
                [metrics_op, global_step_op, accuracy])
            time_elapsed = time.time() - start_time

            #Log some information
            logging.info(
                'Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)',
                global_step_count, accuracy_value, time_elapsed)

            return accuracy_value

        #Define some scalar quantities to monitor
        tf.summary.scalar('Validation_Accuracy', accuracy)
        my_summary_op = tf.summary.merge_all()

        #Get your supervisor
        sv = tf.train.Supervisor(
            logdir=TassMovidiusEval._confs["ClassifierSettings"]["log_eval"],
            summary_op=None,
            init_fn=restore_fn)

        #Now we are ready to run in one session
        with sv.managed_session() as sess:
            for step in range(
                    int(num_batches_per_epoch *
                        TassMovidiusEval._confs["ClassifierSettings"]
                        ["test_num_epochs"])):
                #print vital information every start of the epoch as always
                if step % num_batches_per_epoch == 0:
                    logging.info(
                        'Epoch: %s/%s', step / num_batches_per_epoch + 1,
                        TassMovidiusEval._confs["ClassifierSettings"]
                        ["test_num_epochs"])
                    logging.info('Current Streaming Accuracy: %.4f',
                                 sess.run(accuracy))

                #Compute summaries every 10 steps and continue evaluating
                if step % 10 == 0:
                    eval_step(sess,
                              metrics_op=metrics_op,
                              global_step=sv.global_step)
                    summaries = sess.run(my_summary_op)
                    sv.summary_computed(sess, summaries)

                #Otherwise just run as per normal
                else:
                    eval_step(sess,
                              metrics_op=metrics_op,
                              global_step=sv.global_step)

            #At the end of all the evaluation, show the final accuracy
            logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy))

            #Now we want to visualize the last batch's images just to see what our model has predicted
            raw_images, labels, predictions, probabilities = sess.run(
                [raw_images, labels, predictions, probabilities])
            for i in range(10):
                image, label, prediction, probability = raw_images[i], labels[
                    i], predictions[i], probabilities[i]
                prediction_name, label_name = dataset.labels_to_name[
                    prediction], dataset.labels_to_name[label]
                text = 'Prediction: %s \n Ground Truth: %s \n Probability: %s' % (
                    prediction_name, label_name, probability[prediction])
                cv2.imwrite('model/eval/' + i + '.png', image)
                img_plot = cv2.imread('model/eval/' + i + '.png')
                pl.plot(img_plot.axes.get_xaxis().set_ticks([]),
                        img_plot.axes.get_yaxis().set_ticks([]))
                pl.title(text)
                pl.savefig('model/eval/' + i + "fig.png")

            logging.info(
                'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.'
            )
            sv.saver.save(sess, sv.save_path, global_step=sv.global_step)
示例#2
0
def run():

    humanStart = datetime.now()
    clockStart = time.time()

    print("-- Training Starting ")
    print("-- STARTED: ", humanStart)
    print("")

    #Open the labels file
    Trainer.labels = open(Trainer._confs["ClassifierSettings"]["labels_file"], 'r')

    #Create a dictionary to refer each label to their string name
    for line in Trainer.labels:

        label, string_name = line.split(':')
        string_name = string_name[:-1] #Remove newline
        Trainer.labelsToName[int(label)] = string_name

    #Create a dictionary that will help people understand your dataset better. This is required by the Dataset class later.
    Trainer.items_to_descriptions = {
        'image': 'A 3-channel RGB coloured  image that is ex: office, people',
        'label': 'A label that ,start from zero'
    }

    #Create the log directory here. Must be done here otherwise import will activate this unneededly.
    if not os.path.exists(Trainer._confs["ClassifierSettings"]["log_dir"]):
        os.mkdir(Trainer._confs["ClassifierSettings"]["log_dir"])

    #======================= TRAINING PROCESS =========================
    #Now we start to construct the graph and build our model
    with tf.Graph().as_default() as graph:

        tf.logging.set_verbosity(tf.logging.INFO) #Set the verbosity to INFO level

        #First create the dataset and load one batch
        dataset = Trainer.getSplit('train')

        images, _, labels = Trainer.loadBatch(dataset)

        #Know the number steps to take before decaying the learning rate and batches per epoch
        num_batches_per_epoch = dataset.num_samples // Trainer._confs["ClassifierSettings"]["batch_size"]
        num_steps_per_epoch = num_batches_per_epoch #Because one step is one batch processed
        decay_steps = int(Trainer._confs["ClassifierSettings"]["num_epochs_before_decay"] * num_steps_per_epoch)

        #Create the model inference
        with slim.arg_scope(inception_v3_arg_scope()):
            logits, end_points = inception_v3(images, num_classes = dataset.num_classes, is_training = True)

        #Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

        #Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
        loss = tf.losses.softmax_cross_entropy(onehot_labels = one_hot_labels, logits = logits)
        total_loss = tf.losses.get_total_loss()    #obtain the regularization losses as well

        #Create the global step for monitoring the learning_rate and training.
        global_step = get_or_create_global_step()

        #Define your exponentially decaying learning rate
        lr = tf.train.exponential_decay(
            learning_rate = Trainer._confs["ClassifierSettings"]["initial_learning_rate"],
            global_step = global_step,
            decay_steps = decay_steps,
            decay_rate = Trainer._confs["ClassifierSettings"]["learning_rate_decay_factor"],
            staircase = True)

        #Now we can define the optimizer that takes on the learning rate
        optimizer = tf.train.AdamOptimizer(learning_rate = lr)
        #optimizer = tf.train.RMSPropOptimizer(learning_rate = lr, momentum=0.9)

        #Create the train_op.
        train_op = slim.learning.create_train_op(total_loss, optimizer)

        #State the metrics that you want to predict. We get a predictions that is not one_hot_encoded.
        predictions = tf.argmax(end_points['Predictions'], 1)
        probabilities = end_points['Predictions']
        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, labels)
        metrics_op = tf.group(accuracy_update, probabilities)

        #Now finally create all the summaries you need to monitor and group them into one summary op.
        tf.summary.scalar('losses/Total_Loss', total_loss)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('learning_rate', lr)
        my_summary_op = tf.summary.merge_all()

        #Now we need to create a training step function that runs both the train_op, metrics_op and updates the global_step concurrently.
        def train_step(sess, train_op, global_step, epochCount):
            '''
            Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step
            '''
            #Check the time for each sess run
            start_time = time.time()
            total_loss, global_step_count, _ = sess.run([train_op, global_step, metrics_op])
            time_elapsed = time.time() - start_time

            #Run the logging to print some results
            logging.info(' Epch %.2f Glb Stp %s: Loss: %.4f (%.2f sec/step)', epochCount, global_step_count, total_loss, time_elapsed)

            return total_loss, global_step_count

        #Define your supervisor for running a managed session. Do not run the summary_op automatically or else it will consume too much memory
        sv = tf.train.Supervisor(logdir = Trainer._confs["ClassifierSettings"]["log_dir"], summary_op = None)

        #Run the managed session
        with sv.managed_session() as sess:
            for step in range(num_steps_per_epoch * Trainer._confs["ClassifierSettings"]["dev_cloud_epochs"]):
                #At the start of every epoch, show the vital information:
                if step % num_batches_per_epoch == 0:
                    logging.info('Epoch %s/%s', step/num_batches_per_epoch + 1, Trainer._confs["ClassifierSettings"]["dev_cloud_epochs"])
                    learning_rate_value, accuracy_value = sess.run([lr, accuracy])
                    logging.info('Current Learning Rate: %s', learning_rate_value)
                    logging.info('Current Streaming Accuracy: %s', accuracy_value)

                    # optionally, print your logits and predictions for a sanity check that things are going fine.
                    logits_value, probabilities_value, predictions_value, labels_value = sess.run([logits, probabilities, predictions, labels])
                    print('logits: \n', logits_value[:5])
                    print('Probabilities: \n', probabilities_value[:5])
                    print('predictions: \n', predictions_value[:100])
                    print('Labels:\n:', labels_value[:100])

                #Log the summaries every 10 step.
                if step % 10 == 0:
                    loss, _ = train_step(sess, train_op, sv.global_step, step/num_batches_per_epoch + 1)
                    summaries = sess.run(my_summary_op)
                    sv.summary_computed(sess, summaries)

                #If not, simply run the training step
                else:
                    loss, _ = train_step(sess, train_op, sv.global_step, step/num_batches_per_epoch + 1)

            #We log the final training loss and accuracy
            logging.info('Final Loss: %s', loss)
            logging.info('Final Accuracy: %s', sess.run(accuracy))

            #Once all the training has been done, save the log files and checkpoint model
            logging.info('Finished training! Saving model to disk now.')

    checkpoint_file = tf.train.latest_checkpoint(Trainer._confs["ClassifierSettings"]["log_dir"])

    with tf.Graph().as_default() as graph:

        #images = tf.placeholder(shape=[None, image_size, image_size, 3], dtype=tf.float32, name = 'Placeholder_only')
        images = tf.placeholder("float", [1, Trainer._confs["ClassifierSettings"]["image_size"], Trainer._confs["ClassifierSettings"]["image_size"], 3], name="input")

        with slim.arg_scope(inception_v3_arg_scope()):

            logits, end_points = inception_v3(images, num_classes = Trainer._confs["ClassifierSettings"]["num_classes"], is_training = False)

        probabilities = tf.nn.softmax(logits)

        saver = tf.train.Saver(slim.get_variables_to_restore())

        #Setup graph def
        input_graph_def = graph.as_graph_def()
        output_node_names = "InceptionV3/Predictions/Softmax"
        output_graph_name = "model/DevCloudIDC.pb"

        with tf.Session(config=config) as sess:

            saver.restore(sess, checkpoint_file)

            #Exporting the graph
            print ("Exporting graph...")
            output_graph_def = graph_util.convert_variables_to_constants(
                sess,
                input_graph_def,
                output_node_names.split(","))

            with tf.gfile.GFile(output_graph_name, "wb") as f:

                f.write(output_graph_def.SerializeToString())

        humanEnd = datetime.now()
        clockEnd = time.time()

        print("")
        print("-- Training Ending ")
        print("-- ENDED: ", humanEnd)
        print("-- TIME: {0}".format(clockEnd - clockStart))
        print("")