Exemplo n.º 1
0
def train(config, args):

    start_time = time.time()
    global_step, n_checkpoints, v_s1_best = 0, 0, 0.
    ckpt = tf.train.get_checkpoint_state(directories.checkpoints)
    
    # Build graph
    cnn = Model(config, directories, args=args)
    saver = tf.train.Saver()

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        train_handle = sess.run(cnn.train_iterator.string_handle())
        test_handle = sess.run(cnn.test_iterator.string_handle())

        if args.restore_last and ckpt.model_checkpoint_path:
            # Continue training saved model
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('{} restored.'.format(ckpt.model_checkpoint_path))
        else:
            if args.restore_path:
                new_saver = tf.train.import_meta_graph('{}.meta'.format(args.restore_path))
                new_saver.restore(sess, args.restore_path)
                print('{} restored.'.format(args.restore_path))

        sess.run(cnn.test_iterator.initializer)

        for epoch in range(config.num_epochs):
            sess.run(cnn.train_iterator.initializer)

            # Run diagnostics
            v_s1_best = Diagnostics.run_diagnostics(cnn, config_train, directories, sess, saver, train_handle,
                test_handle, start_time, v_s1_best, epoch, args.name)
            while True:
                try:
                    # Update weights
                    sess.run([cnn.train_op, cnn.update_accuracy], feed_dict={cnn.training_phase: True,
                        cnn.handle: train_handle})

                except tf.errors.OutOfRangeError:
                    print('End of epoch!')
                    break

                except KeyboardInterrupt:
                    save_path = saver.save(sess, os.path.join(directories.checkpoints,
                        '{0}/bcmp_{0}_last.ckpt'.format(args.name)), global_step=epoch)
                    print('Interrupted, model saved to: ', save_path)
                    sys.exit()

        save_path = saver.save(sess, os.path.join(directories.checkpoints,
                               '{0}/bcmp_{0}_end.ckpt'.format(args.name)),
                               global_step=epoch)

    print("Training Complete. Model saved to file: {} Time elapsed: {:.3f} s".format(save_path, time.time()-start_time))