示例#1
0
    def eval_places(self,
                    dataset,
                    data_mode,
                    gpu_frac,
                    workers,
                    mem_dequeing,
                    steps=20):
        """ Runs and tracks a single evaluation step on the given data subset
        Args:
            Other arguments can be seen in _train_network
            data_mode: Data subset to use for evaluation
            steps: Number of examples the model is evaluated on.
        """
        # Create reader to get TFRecords
        reader = DataReader(dataset)

        with tf.Graph().as_default() as g:

            # Read data and define operations
            data, labels = reader.read_batch(1,
                                             data_mode,
                                             mem_dequeing,
                                             workers,
                                             shuffle=True,
                                             train_mode=False)

            main_run = self._build_run_settings(data, labels, [])

            # Load weights ops for warm start on training
            assign_ops = self._load_pretrained()

            # Supervisor for training. We only want it to deal with the
            # session. Initializes variables
            supervisor = tf.train.Supervisor(graph=g)

            # Initialize session
            session_conf = mu.get_session_config(gpu_frac, log_placement=False)
            with supervisor.managed_session(config=session_conf) as sess:

                # Load model, if existing. Otherwise start from scratch
                step_value, _ = self._initialize_model(sess,
                                                       saver=mu.get_saver(),
                                                       is_training=True)

                if len(assign_ops) > 0:
                    logger.info('Assigning pretrained values to models ...')
                    sess.run(assign_ops)
                else:
                    raise RuntimeError(
                        "Unexpected error: no assign operations" +
                        " to load weights")

                try:
                    # Let queues start dequeing examples
                    coord, threads = mu.initialize_queues(sess)
                    counter = 0
                    while counter < steps:
                        self.test_places_output(sess, data, main_run)
                        counter += 1

                    try:
                        mu.finalize_queues(coord, threads)
                    except RuntimeError as e:
                        logger.warning('Error stopping coordinator: %s', e)

                except tf.errors.OutOfRangeError:
                    logger.info('Queue run out of evaluation instances')
示例#2
0
    def _train_network(self,
                       dataset,
                       logs_path,
                       batch_size,
                       metrics,
                       track_summaries,
                       gpu_frac,
                       workers,
                       mem_dequeing,
                       track_models=None,
                       steps=None,
                       max_steps=None,
                       log_steps=10):
        """ Runs training on the network defined for the given number of steps
        and stores a checkpoint at the end.
        Args:
            See train_network for other arguments.
            logs_path: Path where to store the network stats
        Returns
            step: Step at which training has stopped
            loss: Mean loss in the process
            metrics: Mean metrics values in the process
        """
        # Create reader to get TFRecords
        reader = DataReader(dataset)

        with tf.Graph().as_default() as g:

            # Read data and define operations
            data, labels = reader.read_batch(batch_size, DataMode.TRAINING,
                                             mem_dequeing, workers)
            main_run = self._build_run_settings(data, labels, metrics)

            # Load weights ops for warm start on training
            assign_ops = self._load_pretrained()

            # Prepare logging for Tensorboard
            saver, summary_ops, writer = mu.prepare_logging(logs_path, g)

            # Supervisor for training. We only want it to deal with the
            # session. Initializes variables
            supervisor = tf.train.Supervisor(graph=g)

            # Initialize session
            session_conf = mu.get_session_config(gpu_frac, log_placement=False)
            with supervisor.managed_session(config=session_conf) as sess:

                # Load model, if existing. Otherwise start from scratch
                step_value, start = self._initialize_model(sess,
                                                           saver=saver,
                                                           is_training=True)

                # Get stopping condition according to mode
                step_limit = step_value + steps if steps is not None else max_steps  # noqa
                step_limit = step_limit if max_steps is None else min(
                    step_limit, max_steps)  # noqa
                stop = step_value >= step_limit

                # Assign weights only if model started from scratch
                if start is True and len(assign_ops) > 0:
                    logger.info('Assigning pretrained values to models ...')
                    sess.run(assign_ops)

                try:
                    # Let queues start dequeing examples
                    coord, threads = mu.initialize_queues(sess)

                    while not stop:

                        # Run network
                        log_run = step_value % log_steps == 0
                        main_res = main_run.training_run(sess,
                                                         summary_ops,
                                                         log=log_run)

                        # Track summaries if needed
                        if track_summaries is not None and \
                                step_value % track_summaries == 0:
                            mu.store_summaries(writer, step_value,
                                               main_res.summary_str)

                        # Track models if needed
                        if track_models is not None \
                                and step_value % track_models == 0 \
                                and step_value != 0:
                            mu.store_checkpoint(sess, saver, step_value,
                                                logs_path)

                        # Update current step and stop condition
                        step_value = main_res.step
                        stop = step_value >= step_limit

                    # Store model at exit
                    mu.store_checkpoint(sess, saver, step_value, logs_path)

                    try:
                        mu.finalize_queues(coord, threads)
                    except RuntimeError as e:
                        logger.warning('Error stopping coordinator: %s', e)

                except tf.errors.OutOfRangeError as e:
                    logger.warn(
                        'Input queue exhausted due to ' +
                        'unexpected reason: %s.', e)

                return step_value, main_run.loss_average(
                ), main_run.metrics_average()  # noqa
示例#3
0
    def _eval_network(self,
                      dataset,
                      data_mode,
                      logs_path,
                      batch_size,
                      metrics,
                      gpu_frac,
                      workers,
                      mem_dequeing,
                      track_summaries=50,
                      steps=None):
        """ Runs and tracks a single evaluation step on the given data subset
        Args:
            Other arguments can be seen in train_network
            data_mode: Data subset to use for evaluation
            track_summaries: Steps between Tensorboard summaries.
                Only used if steps is None
            steps: Number of batches the model is evaluated on.
                If not None, a single summary is created at the end.
                If None, the model is evaluated on the whole dataset and it
                tracks a summary periodically.
        Returns
            loss: Loss produced by the batch
            metrics: Set of metrics values
        """
        # Create reader to get TFRecords
        reader = DataReader(dataset)

        with tf.Graph().as_default() as g:

            # Read data and define operations
            data, labels = reader.read_batch(batch_size,
                                             data_mode,
                                             mem_dequeing,
                                             workers,
                                             shuffle=True,
                                             train_mode=False)

            main_run = self._build_run_settings(data, labels, metrics)

            # Prepare logging for Tensorboard
            saver, summary_ops, writer = mu.prepare_logging(logs_path, g)

            # Supervisor for training. We only want it to deal with the
            # session. Initializes variables
            supervisor = tf.train.Supervisor(graph=g)

            # Initialize session
            session_conf = mu.get_session_config(gpu_frac, log_placement=False)
            with supervisor.managed_session(config=session_conf) as sess:

                # Load model, if existing. Otherwise start from scratch
                step_value, _ = self._initialize_model(sess,
                                                       saver=saver,
                                                       is_training=False)

                try:
                    # Let queues start dequeing examples
                    coord, threads = mu.initialize_queues(sess)

                    # Initialize loop conditions
                    step_counter = 0
                    stop = False if steps is None else step_counter >= steps

                    while not stop:

                        # Run evaluation
                        res = main_run.test_run(sess,
                                                summary_ops,
                                                step_value,
                                                data_mode=data_mode,
                                                log=True)

                        if steps is not None and steps == step_counter:
                            # Reached max steps, store summary and stop
                            main_run.manual_log(writer, step_value)
                            stop = True
                        elif steps is None \
                                and step_counter % track_summaries == 0:
                            # Periodic storage of summaries
                            mu.store_summaries(writer, step_counter,
                                               res.summary_str)

                        step_counter += 1

                    try:
                        mu.finalize_queues(coord, threads)
                    except RuntimeError as e:
                        logger.warning('Error stopping coordinator: %s', e)

                except tf.errors.OutOfRangeError:
                    logger.info('Queue run out of evaluation instances')

                return main_run.loss_average(), main_run.metrics_average()
示例#4
0
FLAGS = tf.app.flags.FLAGS

if __name__ == '__main__':

    with tf.Session() as sess:

        # Airbnb settings
        dataset = AirbnbSettings(dataset_location=FLAGS.data_location,
                                 image_specs=get_alexnet_specs(
                                     FLAGS.batch_size, random_crop=True))

        # Read batches from dataset
        reader = DataReader(dataset)
        features, label = reader.read_batch(
            batch_size=FLAGS.batch_size,
            data_mode=DataMode.TRAINING,  # Use whatever here, e.g. training
            memory_factor=FLAGS.memory_factor,
            reader_threads=FLAGS.reader_threads,
            train_mode=False)

        # Initi all vars
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        # Define coordinator to handle all threads
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)

        example, l = sess.run([features, label])

        # Print first instance in batch
        idx = 0