Exemplo n.º 1
0
 def test_group_by_batches_truncated(self):
     self.assertEqual([], list(util.group_by_batches([], 2, truncate=True)))
     self.assertEqual([[1], [2], [3]],
                      list(
                          util.group_by_batches([1, 2, 3], 1,
                                                truncate=True)))
     self.assertEqual([[1, 2]],
                      list(
                          util.group_by_batches([1, 2, 3], 2,
                                                truncate=True)))
Exemplo n.º 2
0
 def _run(self, supervisor, session):
   batches = (  # generates (size, feed_dict) pairs
       (len(batch), self.compiler.build_feed_dict(batch))
       for batch in util.group_by_batches(self.examples, self.batch_size))
   if self.eval_interval_secs:
     gen_batches = util.epochs(batches, shuffle=False)  # memoize batches
     max_reported_step = 0
     # Should eval for the final measurement even if _should_stop is true.
     while not (self._should_stop(supervisor) and max_reported_step > 0):
       start_time = time.time()
       if self._restore(supervisor, session):
         step = tf.train.global_step(session, self.global_step)
         if step > max_reported_step:
           max_reported_step = step
           results = self._eval_batches(
               supervisor, session, next(gen_batches), step)
           self._report_loss_and_save_best(supervisor, session, step, *results)
           if self._should_stop(supervisor): break
         else:
           self.log_and_print('not running eval because step=%s' % step)
       sleep_time = self.eval_interval_secs - (time.time() - start_time)
       if sleep_time > 0: time.sleep(sleep_time)
   elif self._restore(supervisor, session):
     step = tf.train.global_step(session, self.global_step)
     results = self._eval_batches(supervisor, session, batches, step)
     if results[0] is not None:
       self._report_loss_and_save_best(supervisor, session, step, *results)
   self.report_done()
Exemplo n.º 3
0
 def prepare_batches(shuffled):
   for batch in util.group_by_batches(shuffled, self.batch_size,
                                      truncate=self.exact_batch_sizes):
     feed_dict[self.compiler.loom_input_tensor] = batch
     if self.compute_summaries:
       feed_dict[self.batch_size_placeholder] = len(batch)
     yield
Exemplo n.º 4
0
 def prepare_batches(shuffled):
     for batch in util.group_by_batches(shuffled,
                                        self.batch_size,
                                        truncate=False):
         feed_dict[self.compiler.loom_input_tensor] = batch
         feed_dict[self._batch_size_ph] = len(batch)
         yield
Exemplo n.º 5
0
 def _run(self, supervisor, session):
     batches = (  # generates (size, feed_dict) pairs
         (len(batch), self.compiler.build_feed_dict(batch))
         for batch in util.group_by_batches(self.examples, self.batch_size))
     if self.eval_interval_secs:
         gen_batches = util.epochs(batches,
                                   shuffle=False)  # memoize batches
         max_reported_step = 0
         while not (self._should_stop(supervisor)
                    and max_reported_step > 0):
             start_time = time.time()
             if self._restore(supervisor, session):
                 step = tf.train.global_step(session, self.global_step)
                 if step > max_reported_step:
                     max_reported_step = step
                     results = self._eval_batches(supervisor, session,
                                                  next(gen_batches), step)
                     if results[0] is None:
                         break  # should_stop returned true
                     self._report_loss_and_save_best(
                         supervisor, session, step, *results)
                 else:
                     self.log_and_print('not running eval because step=%s' %
                                        step)
             sleep_time = self.eval_interval_secs - (time.time() -
                                                     start_time)
             if sleep_time > 0: time.sleep(sleep_time)
     elif self._restore(supervisor, session):
         step = tf.train.global_step(session, self.global_step)
         results = self._eval_batches(supervisor, session, batches, step)
         if results[0] is not None:
             self._report_loss_and_save_best(supervisor, session, step,
                                             *results)
     self.report_done()
Exemplo n.º 6
0
Arquivo: plan.py Projeto: hfchong/fold
 def prepare_batches(shuffled):
   for batch in util.group_by_batches(shuffled, self.batch_size,
                                      truncate=self.exact_batch_sizes):
     feed_dict[self.compiler.loom_input_tensor] = batch
     if self.compute_summaries:
       feed_dict[self.batch_size_placeholder] = len(batch)
     yield
Exemplo n.º 7
0
 def _run(self, supervisor, session):
     batches = (  # generates (size, feed_dict) pairs
         (len(batch), self.compiler.build_feed_dict(batch))
         for batch in util.group_by_batches(self.examples, self.batch_size))
     if self._restore(supervisor, session):
         step = tf.train.global_step(session, self._global_step)
         results = self._eval_batches(supervisor, session, batches, step)
         if results[0] is not None:
             self._report_loss_and_save_best(supervisor, session, step,
                                             *results)
Exemplo n.º 8
0
    def build_loom_input_batched(self,
                                 examples,
                                 batch_size=None,
                                 metric_labels=False,
                                 ordered=False):
        """Turns examples into a feed value for `self.loom_input_tensor`.

    The result is an iterator; work doesn't happen until you call
    e.g. `next()` or `list()` on it.

    Args:
      examples: A non-empty iterable of examples to be built into tensors.
      batch_size: The maximum number of examples to compile into each loom
        input. Defaults to 100. If multiprocessing then this will also be
        the chunk size for each unit of work.
      metric_labels: Whether or not to return metric labels.
      ordered: Whether or not to preserve ordering when multiprocessing,
        otherwise has not effect (and order is always preserved).

    Returns:
      Feed value(s) corresponding to `examples` grouped into batches. The result
      itself can be fed directly to `self.loom_input_tensor`, or be iterated
      over to feed values batch-by-batch. If `metric_labels` is True, an
      iterable of `(batch_feed_value, metric_labels)` tuples.

    Raises:
      TypeError: If `examples` is not an iterable.
      RuntimeError: If [`init_loom()`](#td.Compiler.init_loom) has not been
        called.

    """
        self._check_build('build_loom_input_batched', examples)
        if batch_size is None: batch_size = 100
        batches = util.group_by_batches(examples, batch_size)
        results = _map_maybe_parallel(self.pool,
                                      _subprocess_build_batch,
                                      self._build_batch,
                                      batches,
                                      ordered,
                                      chunk_size=1,
                                      metric_labels=metric_labels)
        # If metric_labels is false, use an edible iterator so that
        # `results` can be fed.
        if not metric_labels: results = util.EdibleIterator(results)
        return results
Exemplo n.º 9
0
  def _run(self, supervisor, session):
    train_feed_dict = self.train_feeds.copy()
    train_fetches = {'train_op': self.train_op, 'loss': self.loss_total,
                     'step': self.global_step}
    if self.compute_summaries: train_fetches['summaries'] = self.summaries
    # The training loop is essentially the same regardless of whether
    # we are passing batches by feed dict or by loom input
    # tensor. There are a few minor differences:
    #
    # 1. By feed dict, we compute the size of the training set lazily,
    #    as we iterate over it in the first epoch. By input tensor, we
    #    calculate train_size as batch_size * batches_per_epoch.
    #
    # 2. By feed dict, we get the size of each batch by calling len()
    #    on it (since the last batch in the epoch may have less than
    #    batch_size elements). By input tensor, we require that every
    #    batch have exactly batch_size elements.
    #
    # 3. By feed dict we need to create batches of inputs, and feed
    #    them every time we run the train op (obviously).
    if self.examples:
      epochs, train_size = self._by_feed_dict(train_feed_dict)
    else:
      epochs, train_size = self._by_input_tensor(train_feed_dict)
    if self.dev_examples:
      # Memoize a generator of batches of (size, feed_dict) pairs.
      gen_dev_batches = util.epochs(
          ((len(batch), self.compiler.build_feed_dict(batch))
           for batch in util.group_by_batches(
               self.dev_examples, self.batch_size)), shuffle=False)
      # If there is an existing checkpoint in logdir, and we are
      # saving the best model, calculate best_loss before doing any
      # training, so we don't potentially replace a better-performing
      # checkpoint with a worse one.
      ckpt = tf.train.get_checkpoint_state(self.logdir)
      if ckpt and ckpt.model_checkpoint_path:
        _, self._best_loss, _ = self._eval_batches(
            supervisor, session, next(gen_dev_batches), None, is_dev=True)
        if self._best_loss is None: return  # should_stop returned true

    for epoch, batches in enumerate(epochs, 1):
      train_loss = 0.0
      for _ in batches:
        if self._should_stop(supervisor): return
        results = session.run(train_fetches, train_feed_dict)
        train_loss += results['loss']
        if self.compute_summaries:
          supervisor.summary_computed(
              session, results['summaries'], results['step'])
      if train_size == 0:
        raise ValueError('examples must be non-empty')
      if self.exact_batch_sizes and epoch == 1:
        if train_size < self.batch_size:
          raise ValueError('when exact_batch_sizes is true, examples must have '
                           'at least batch_size items; %s vs. %s' % (
                               train_size, self.batch_size))
        train_size -= train_size % self.batch_size
      train_loss /= train_size
      self.report_loss(results['step'], train_loss)
      log_str = 'epoch:%5d train[loss: %.3e]' % (epoch, train_loss)
      if self.dev_examples:
        dev_size, dev_loss, dev_metrics = self._eval_batches(
            supervisor, session, next(gen_dev_batches), results['step'],
            is_dev=True)
        if dev_size is None: return  # should_stop returned true
        if epoch == 1: self.log_and_print('train_size: %d dev_size: %d' %
                                          (train_size, dev_size))
        log_str += ' dev[%s]' % _eval_str(dev_size, dev_loss, dev_metrics)
        self.log_and_print(log_str)
        self._save_best(session, supervisor.saver, dev_loss, results['step'])
      else:
        if epoch == 1: self.log_and_print('train_size: %d' % train_size)
        self.log_and_print(log_str)
    if not self.dev_examples and self.is_chief_trainer:
      save_path = os.path.join(self.logdir, 'model.ckpt')
      save_fname = supervisor.saver.save(
          session, save_path, global_step=results['step'])
      self.log_and_print('final model saved in file: %s' % save_fname)
Exemplo n.º 10
0
 def _run(self, supervisor, session):
   return self.results_fn(itertools.chain.from_iterable(
       self._run_batch(examples, session)
       for examples in util.group_by_batches(self.examples, self.batch_size)))
Exemplo n.º 11
0
 def _run(self, supervisor, session):
     return self.results_fn(
         itertools.chain.from_iterable(
             self._run_batch(examples, session) for examples in
             util.group_by_batches(self.examples, self.batch_size)))
Exemplo n.º 12
0
    def _run(self, supervisor, session):
        train_feed_dict = self.train_feeds.copy()
        train_fetches = {
            'train_op': self.train_op,
            'loss': self._loss_total,
            'step': self._global_step
        }
        train_fetches['summaries'] = self._summaries
        epochs, train_size = self._by_feed_dict(train_feed_dict)
        if self.dev_examples:
            # Memoize a generator of batches of (size, feed_dict) pairs.
            gen_dev_batches = util.epochs(
                ((len(batch), self.compiler.build_feed_dict(batch)) for batch
                 in util.group_by_batches(self.dev_examples, self.batch_size)),
                shuffle=False)
            # If there is an existing checkpoint in logdir, and we are
            # saving the best model, calculate best_loss before doing any
            # training, so we don't potentially replace a better-performing
            # checkpoint with a worse one.
            ckpt = tf.train.get_checkpoint_state(self.logdir)
            if ckpt and ckpt.model_checkpoint_path:
                _, self._best_loss, _ = self._eval_batches(
                    supervisor,
                    session,
                    next(gen_dev_batches),
                    None,
                    is_dev=True)
                if self._best_loss is None: return  # should_stop returned true

        for epoch, batches in enumerate(epochs, 1):
            self.log_and_print('Starting epoch %d.' % epoch)
            train_loss = 0.0
            for (k, _) in enumerate(batches):
                results = session.run(train_fetches, train_feed_dict)
                train_loss += results['loss']
                self.log_and_print('Batch %d: loss %f' % (k, results['loss']))
                supervisor.summary_computed(session, results['summaries'],
                                            results['step'])
            if train_size == 0:
                raise ValueError('examples must be non-empty')
            train_loss /= train_size
            log_str = 'epoch:%5d train[loss: %.3e]' % (epoch, train_loss)

            if self.dev_examples:
                dev_size, dev_loss, dev_metrics = self._eval_batches(
                    supervisor,
                    session,
                    next(gen_dev_batches),
                    results['step'],
                    is_dev=True)
                if epoch == 1:
                    self.log_and_print('train_size: %d dev_size: %d' %
                                       (train_size, dev_size))
                log_str += ' dev[%s]' % _eval_str(dev_size, dev_loss,
                                                  dev_metrics)
                self._save_best(session, supervisor.saver, dev_loss,
                                results['step'])
            else:
                if epoch == 1:
                    self.log_and_print('train_size: %d' % train_size)
            self.log_and_print(log_str)

        if not self.dev_examples:
            save_path = os.path.join(self.logdir, 'model.ckpt')
            save_fname = supervisor.saver.save(session,
                                               save_path,
                                               global_step=results['step'])
            self.log_and_print('final model saved in file: %s' % save_fname)
Exemplo n.º 13
0
 def test_group_by_batches_truncated(self):
   self.assertEqual([], list(util.group_by_batches([], 2, truncate=True)))
   self.assertEqual([[1], [2], [3]],
                    list(util.group_by_batches([1, 2, 3], 1, truncate=True)))
   self.assertEqual([[1, 2]],
                    list(util.group_by_batches([1, 2, 3], 2, truncate=True)))
Exemplo n.º 14
0
 def test_group_by_batches(self):
   self.assertEqual([], list(util.group_by_batches([], 2)))
   self.assertEqual([[1], [2], [3]], list(util.group_by_batches([1, 2, 3], 1)))
   self.assertEqual([[1, 2], [3]], list(util.group_by_batches([1, 2, 3], 2)))
Exemplo n.º 15
0
    def _run(self, supervisor, session):
        train_feed_dict = self.train_feeds.copy()
        train_fetches = {
            'train_op': self.train_op,
            'loss': self.loss_total,
            'step': self.global_step
        }
        if self.compute_summaries: train_fetches['summaries'] = self.summaries
        # The training loop is essentially the same regardless of whether
        # we are passing batches by feed dict or by loom input
        # tensor. There are a few minor differences:
        #
        # 1. By feed dict, we compute the size of the training set lazily,
        #    as we iterate over it in the first epoch. By input tensor, we
        #    calculate train_size as batch_size * batches_per_epoch.
        #
        # 2. By feed dict, we get the size of each batch by calling len()
        #    on it (since the last batch in the epoch may have less than
        #    batch_size elements). By input tensor, we require that every
        #    batch have exactly batch_size elements.
        #
        # 3. By feed dict we need to create batches of inputs, and feed
        #    them every time we run the train op (obviously).
        if self.examples:
            epochs, train_size = self._by_feed_dict(train_feed_dict)
        else:
            epochs, train_size = self._by_input_tensor(train_feed_dict)
        if self.dev_examples:
            # Memoize a generator of batches of (size, feed_dict) pairs.
            gen_dev_batches = util.epochs(
                ((len(batch), self.compiler.build_feed_dict(batch)) for batch
                 in util.group_by_batches(self.dev_examples, self.batch_size)),
                shuffle=False)

        for epoch, batches in enumerate(epochs, 1):
            train_loss = 0.0
            for _ in batches:
                if self._should_stop(supervisor): return
                results = session.run(train_fetches, train_feed_dict)
                train_loss += results['loss']
                if self.compute_summaries:
                    supervisor.summary_computed(session, results['summaries'],
                                                results['step'])
            if train_size == 0:
                raise ValueError('examples must be non-empty')
            if self.exact_batch_sizes and epoch == 1:
                if train_size < self.batch_size:
                    raise ValueError(
                        'when exact_batch_sizes is true, examples must have '
                        'at least batch_size items; %s vs. %s' %
                        (train_size, self.batch_size))
                train_size -= train_size % self.batch_size
            train_loss /= train_size
            self.report_loss(results['step'], train_loss)
            log_str = 'epoch:%5d train[loss: %.3e]' % (epoch, train_loss)
            if self.dev_examples:
                dev_size, dev_loss, dev_metrics = self._eval_batches(
                    supervisor,
                    session,
                    next(gen_dev_batches),
                    results['step'],
                    is_dev=True)
                if dev_size is None: return  # should_stop returned true
                if epoch == 1:
                    self.log_and_print('train_size: %d dev_size: %d' %
                                       (train_size, dev_size))
                log_str += ' dev[%s]' % _eval_str(dev_size, dev_loss,
                                                  dev_metrics)
                self.log_and_print(log_str)
                self._save_best(session, supervisor.saver, dev_loss,
                                results['step'])
            else:
                if epoch == 1:
                    self.log_and_print('train_size: %d' % train_size)
                self.log_and_print(log_str)
        if not self.dev_examples and self.is_chief_trainer:
            save_path = os.path.join(self.logdir, 'model.ckpt')
            save_fname = supervisor.saver.save(session,
                                               save_path,
                                               global_step=results['step'])
            self.log_and_print('final model saved in file: %s' % save_fname)
Exemplo n.º 16
0
 def test_group_by_batches(self):
     self.assertEqual([], list(util.group_by_batches([], 2)))
     self.assertEqual([[1], [2], [3]],
                      list(util.group_by_batches([1, 2, 3], 1)))
     self.assertEqual([[1, 2], [3]],
                      list(util.group_by_batches([1, 2, 3], 2)))