Пример #1
0
def continue_training(logdir):
    """
    Continues training of a model.

    This will load model files and weights found in logdir and continues
    an aborted training.

    Parameters
    ----------
    logdir : string
        Directory with logs.
    """
    hypes = utils.load_hypes_from_logdir(logdir)
    modules = utils.load_modules_from_logdir(logdir)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)
        sess = tv_sess['sess']
        saver = tv_sess['saver']

        logging_file = os.path.join(logdir, 'output.log')
        utils.create_filewrite_handler(logging_file, mode='a')

        logging.info("Continue training.")

        cur_step = core.load_weights(logdir, sess, saver)
        if cur_step is None:
            logging.warning("Loaded global_step is None.")
            logging.warning("This could mean,"
                            " that no weights have been loaded.")
            logging.warning("Starting Training with step 0.")
            cur_step = 0

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        run_training(hypes, modules, tv_graph, tv_sess, cur_step)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Пример #2
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)

    # set to allocate memory on GPU as needed
    # For more details, look at
    # https://stackoverflow.com/questions/36927607/how-can-i-solve-ran-out-of-gpu-memory-in-tensorflow
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session(config=config) as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        train.run_training(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Пример #3
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """

    modules = utils.load_modules_from_hypes(hypes)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        logging.info("..Creating queues")
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        logging.info("..Building training graph")
        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepare the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        logging.info("..Enqueuing files")
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        logging.info("..Initializing the training")
        run_training(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Пример #4
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules,
                                                 image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        run_training(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Пример #5
0
def continue_training(logdir):
    """
    Continues training of a model.

    This will load model files and weights found in logdir and continues
    an aborted training.

    Parameters
    ----------
    logdir : string
        Directory with logs.
    """
    hypes = utils.load_hypes_from_logdir(logdir)
    modules = utils.load_modules_from_logdir(logdir)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)
        sess = tv_sess['sess']
        saver = tv_sess['saver']

        logging_file = os.path.join(logdir, 'output.log')
        utils.create_filewrite_handler(logging_file, mode='a')

        logging.info("Continue training.")

        cur_step = core.load_weights(logdir, sess, saver)
        if cur_step is None:
            logging.warning("Loaded global_step is None.")
            logging.warning("This could mean,"
                            " that no weights have been loaded.")
            logging.warning("Starting Training with step 0.")
            cur_step = 0

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules,
                                                 image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        run_training(hypes, modules, tv_graph, tv_sess, cur_step)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Пример #6
0
def main(_):
    utils.set_gpus_to_use()

    try:
        import tensorvision.train
        import tensorflow_fcn.utils
    except ImportError:
        logging.error("Could not import the submodules.")
        logging.error("Please execute:"
                      "'git submodule update --init --recursive'")
        exit(1)

    with open(tf.app.flags.FLAGS.hypes, 'r') as f:
        logging.info("f: %s", f)
        hypes = json.load(f)
    utils.load_plugins()

    if 'TV_DIR_RUNS' in os.environ:
        runs_dir = os.path.join(os.environ['TV_DIR_RUNS'], 'KittiSeg')
    else:
        runs_dir = 'RUNS'

    utils.set_dirs(hypes, tf.app.flags.FLAGS.hypes)
    utils._add_paths_to_sys(hypes)
    train.maybe_download_and_extract(hypes)
    maybe_download_and_extract(runs_dir)

    logging.info("Trimming weights.")
    logdir = os.path.join(runs_dir, FLAGS.RUN)
    modules = utils.load_modules_from_hypes(hypes)

    with tf.Graph().as_default():

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepare the tv session
        with tf.Session().as_default():
            tv_sess = core.start_tv_session(hypes)
        sess = tv_sess['sess']
        saver = tv_sess['saver']

        cur_step = core.load_weights(logdir, sess, saver)
        if cur_step is None:
            logging.warning("Loaded global_step is None.")
            logging.warning("This could mean,"
                            " that no weights have been loaded.")
            logging.warning("Starting Training with step 0.")
            cur_step = 0

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # prepaire the tv session
        image_pl = tf.placeholder(tf.float32)
        image = tf.expand_dims(image_pl, 0)
        image.set_shape([1, None, None, 3])
        inf_out = core.build_inference_graph(hypes, modules, image=image)

        # Create a session for running Ops on the Graph.
        trim_dir = 'RUNS/trimmed'
        shutil.copytree(logdir, trim_dir)
        shutil.copy(tf.app.flags.FLAGS.hypes,
                    os.path.join(trim_dir, 'model_files', 'hypes.json'))
        sess = tf.Session()
        saver = tf.train.Saver()
        core.load_weights(trim_dir, sess, saver)

        for weight in tf.contrib.model_pruning.get_masks():
            if any([
                    layer in weight.name
                    for layer in hypes['layer_pruning']['layers']
            ]):
                weight_value = tv_sess['sess'].run(weight)
                kernel_count = int(weight_value.shape[3] *
                                   hypes['layer_pruning']['layer_sparsity'])

                l1_values = np.sum(np.abs(weight_value), axis=(0, 1, 2))
                toss_kernels = l1_values.argsort()[:kernel_count]
                weight_value[:, :, :, toss_kernels] = 0
                assign_op = tf.assign(weight, tf.constant(weight_value))
                tv_sess['sess'].run(assign_op)

        checkpoint_path = os.path.join(trim_dir, 'model.ckpt')
        tv_sess['saver'].save(sess, checkpoint_path, global_step=cur_step)

    train.continue_training(trim_dir)
Пример #7
0
def do_training(hypes):
    """
    Train model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')
        
        regression_weights = tf.placeholder(dtype=tf.float32, shape=(3,))
        hypes['solver']['regression_weights'] = regression_weights

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            calib = tf.placeholder(tf.float32, shape=[1, hypes['grid_height'], hypes['grid_width'], 3, 4])
            xy_scale = tf.placeholder(tf.float32, shape=[1, hypes['grid_height'], hypes['grid_width'], 2])
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, 384, 1248, 3])
            inf_out, encoder_out = core.build_inference_graph(hypes, modules,
                                                 image, calib, xy_scale)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out
            tv_graph['calib_pl'] = calib
            tv_graph['xy_scale_pl'] = xy_scale
            tv_graph['encoder_out'] = encoder_out


        all_variables = tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES)
        sess.run(tf.variables_initializer(all_variables))
        var_list = [var for var in all_variables if "beta" not in var.name and 'Adam' not in var.name]    
  
        saver = tf.train.Saver(var_list=var_list)
        saver.restore(sess, hypes['pretrained'])
     
        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        run_training(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Пример #8
0
def do_evaling(hypes):
    """
    eval model for a number of steps.

    This trains the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    modules = utils.load_modules_from_hypes(hypes)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:
        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # prepaire the tv session
        tv_sess = core.start_tv_session(hypes)

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            calib = tf.placeholder(
                tf.float32,
                shape=[1, hypes['grid_height'], hypes['grid_width'], 3, 4])
            xy_scale = tf.placeholder(
                tf.float32,
                shape=[1, hypes['grid_height'], hypes['grid_width'], 2])
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, 384, 1248, 3])
            inf_out = core.build_inference_graph(hypes, modules, image, calib,
                                                 xy_scale)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out
            tv_graph['calib_pl'] = calib
            tv_graph['xy_scale_pl'] = xy_scale

        all_variables = tf.get_collection_ref(tf.GraphKeys.GLOBAL_VARIABLES)
        sess.run(tf.variables_initializer(all_variables))

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)
        saver = tf.train.Saver()
        print("Reading checkpoints...")
        ckpt = tf.train.get_checkpoint_state(hypes["dirs"]["ckpt_dir"])
        if ckpt and ckpt.model_checkpoint_path:
            global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                '-')[-1]
            saver.restore(sess, ckpt.model_checkpoint_path)
            print('Loading success, global_step is %s' % global_step)

        # And then after everything is built, start the training loop.
        run_evaling(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])
Пример #9
0
def do_finetuning(hypes):
    """
    Finetune model for a number of steps.

    This finetunes the model for at most hypes['solver']['max_steps'].
    It shows an update every utils.cfg.step_show steps and writes
    the model to hypes['dirs']['output_dir'] every utils.cfg.step_eval
    steps.

    Paramters
    ---------
    hypes : dict
        Hyperparameters
    """
    # Get the sets of images and labels for training, validation, and
    # test on MNIST.

    try:
        import tensorvision.core as core
    except ImportError:
        logging.error("Could not import the submodules.")
        logging.error("Please execute:"
                      "'git submodule update --init --recursive'")
        exit(1)

    modules = utils.load_modules_from_hypes(hypes)

    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Session() as sess:

        # build the graph based on the loaded modules
        with tf.name_scope("Queues"):
            queue = modules['input'].create_queues(hypes, 'train')

        tv_graph = core.build_training_graph(hypes, queue, modules)

        # restoring vars
        vars_to_restore = restoring_vars(hypes, sess)

        restorer = tf.train.Saver(vars_to_restore)

        # load pre-trained model of hand segmentation
        logging.info("Loading pretrained model's weights")
        model_dir = hypes['transfer']['model_folder']
        model_file = hypes['transfer']['model_name']
        # DEBUG: check the model file
        # check_model(os.path.join(model_dir, model_file))
        """
        # Get a list of vars to restore
        vars_to_restore = restoring_vars(sess)
        print("vars to restore:", vars_to_restore)
        # Create another Saver for restoring pre-trained vars
        saver = tf.train.Saver(vars_to_restore)
        """
        core.load_weights(model_dir, sess, restorer)
        # load_trained_model(sess, hypes)

        saver = tf.train.Saver(max_to_keep=int(utils.cfg.max_to_keep))

        # prepaire the tv session
        tv_sess = prepare_tv_session(hypes, sess, saver)

        # DEBUG: print weights
        # check_weights(tv_sess['sess'])
        # check_graph(tv_sess['sess'])

        with tf.name_scope('Validation'):
            tf.get_variable_scope().reuse_variables()
            image_pl = tf.placeholder(tf.float32)
            image = tf.expand_dims(image_pl, 0)
            image.set_shape([1, None, None, 3])
            inf_out = core.build_inference_graph(hypes, modules, image=image)
            tv_graph['image_pl'] = image_pl
            tv_graph['inf_out'] = inf_out

        # Start the data load
        modules['input'].start_enqueuing_threads(hypes, queue, 'train', sess)

        # And then after everything is built, start the training loop.
        train.run_training(hypes, modules, tv_graph, tv_sess)

        # stopping input Threads
        tv_sess['coord'].request_stop()
        tv_sess['coord'].join(tv_sess['threads'])