Пример #1
0
 def testGlobalStep(self):
     v = tf.Variable(0)
     b1 = bookkeeper.for_new_graph(global_step=v)
     with b1.g.as_default():
         self.assertEqual(v, bookkeeper.global_step())
     with self.assertRaises(ValueError):
         bookkeeper.for_new_graph(global_step=tf.Variable(1.0))
Пример #2
0
 def testGlobalStep(self):
   v = tf.Variable(0)
   b1 = bookkeeper.for_new_graph(global_step=v)
   with b1.g.as_default():
     self.assertEqual(v, bookkeeper.global_step())
   with self.assertRaises(ValueError):
     bookkeeper.for_new_graph(global_step=tf.Variable(1.0))
Пример #3
0
def slice_constant(data, batch_size=32, name='constant_data', global_step=None):
  """Provide a slice based on the global_step.

  This is useful when the entire data array can be stored in memory because it
  allows you to feed the data very efficiently.

  Args:
    data: A numpy array or tensor.
    batch_size: The batch size for the produced data.
    name: An optional name for this data.
    global_step: A global step variable that is used to read the data. If None
      then the default prettytensor global_step is used.
  Returns:
    A tensor that produces the given data.
  """
  with tf.name_scope(name):
    all_data = tf.convert_to_tensor(data)
    global_step = global_step or bookkeeper.global_step()

    count = len(data) / batch_size
    extra = len(data) - count * batch_size

    if extra:
      offset = tf.mod(global_step, count)
      return tf.slice(all_data, offset * batch_size, batch_size)
    else:
      offset = tf.mod(global_step, count + 1)
      return tf.slice(all_data, offset * batch_size,
                      tf.where(tf.equal(offset, count), extra, batch_size))
Пример #4
0
def slice_constant(data, batch_size=32, name='constant_data', global_step=None):
  """Provide a slice based on the global_step.

  This is useful when the entire data array can be stored in memory because it
  allows you to feed the data very efficiently.

  Args:
    data: A numpy array or tensor.
    batch_size: The batch size for the produced data.
    name: An optional name for this data.
    global_step: A global step variable that is used to read the data. If None
      then the default prettytensor global_step is used.
  Returns:
    A tensor that produces the given data.
  """
  with tf.name_scope(name):
    all_data = tf.convert_to_tensor(data)
    global_step = global_step or bookkeeper.global_step()

    count = len(data) / batch_size
    extra = len(data) - count * batch_size

    if extra:
      offset = tf.mod(global_step, count)
      return tf.slice(all_data, offset * batch_size, batch_size)
    else:
      offset = tf.mod(global_step, count + 1)
      return tf.slice(all_data, offset * batch_size,
                      tf.select(tf.equal(offset, count), extra, batch_size))
Пример #5
0
    def evaluate_repeatedly(
        self, accuracy, num_steps, feed_vars=(), feed_data=None, summary_tag=None, evaluation_times=-1
    ):
        """Runs the evaluation in a loop for `evaluation_times`.

    On each iteration, `evaluate_model` is called with the supplied arguments.
    This manages the queue threads itself.

    Args:
      accuracy: The metric that is being evaluated.
      num_steps: The number of steps to run in the evaluator.
      feed_vars: A list or tuple of the variables that will be fed.
      feed_data: A generator that produces tuples of the same length as
        feed_vars.
      summary_tag: If provided, the final result of each evaluation will be
        published to this tag.
      evaluation_times: Run this loop for this many times or forever if it is
        `-1`.
    """
        i = 0
        sess = tf.get_default_session()

        current_checkpoint = self.load_from_checkpoint(sess)
        while not current_checkpoint:
            print("Model not yet available, sleeping for 10 seconds %s." % os.path.dirname(self._save_path))
            sys.stdout.flush()
            time.sleep(10)
            current_checkpoint = self.load_from_checkpoint(sess)

        # Create relevant ops before starting queue runners.
        self._run_init_test_vars_op()

        try:
            while True:
                i += 1
                accuracy_result = self.evaluate_model(
                    accuracy,
                    num_steps,
                    summary_tag=summary_tag,
                    print_every=0,
                    feed_vars=feed_vars,
                    feed_data=feed_data,
                )
                if not summary_tag:
                    print("[%d] %s %g" % (sess.run(bookkeeper.global_step()), summary_tag, accuracy_result))
                if i == evaluation_times:
                    break
                while True:
                    next_checkpoint = self.load_from_checkpoint(sess)
                    if next_checkpoint == current_checkpoint:
                        time.sleep(10)
                    else:
                        break

                current_checkpoint = next_checkpoint
        finally:
            print("Shutting down")
            sys.stdout.flush()
            self.stop_queues()
Пример #6
0
    def evaluate_repeatedly(self,
                            accuracy,
                            num_steps,
                            feed_vars=(),
                            feed_data=None,
                            summary_tag=None,
                            evaluation_times=-1):
        """Runs the evaluation in a loop for `evaluation_times`.

    On each iteration, `evaluate_model` is called with the supplied arguments.
    This manages the queue threads itself.

    Args:
      accuracy: The metric that is being evaluated.
      num_steps: The number of steps to run in the evaluator.
      feed_vars: A list or tuple of the variables that will be fed.
      feed_data: A generator that produces tuples of the same length as
        feed_vars.
      summary_tag: If provided, the final result of each evaluation will be
        published to this tag.
      evaluation_times: Run this loop for this many times or forever if it is
        `-1`.

    Returns:
      The final evaluation result from `evaluate_model` if `evaluation_times`
      ever ends.
    """
        current_checkpoint = None
        try:
            for i in itertools.count(0):
                # New session each time to reset queues - Yay.
                with self.session() as sess:
                    current_checkpoint = self.load_new_checkpoint_when_available(
                        sess, current_checkpoint)
                    # Create relevant ops before starting queue runners.
                    self._run_init_test_vars_op()

                    accuracy_result = self.evaluate_model(
                        accuracy,
                        num_steps,
                        summary_tag=summary_tag,
                        print_every=0,
                        feed_vars=feed_vars,
                        feed_data=feed_data)
                    if not summary_tag:
                        print('[%d] %s' % (sess.run(
                            bookkeeper.global_step()), accuracy_result))
                    if (i + 1) == evaluation_times:
                        return accuracy_result
        finally:
            print('Shutting down')
            sys.stdout.flush()
            self.stop_queues()
Пример #7
0
  def evaluate_repeatedly(self,
                          accuracy,
                          num_steps,
                          feed_vars=(),
                          feed_data=None,
                          summary_tag=None,
                          evaluation_times=-1):
    """Runs the evaluation in a loop for `evaluation_times`.

    On each iteration, `evaluate_model` is called with the supplied arguments.
    This manages the queue threads itself.

    Args:
      accuracy: The metric that is being evaluated.
      num_steps: The number of steps to run in the evaluator.
      feed_vars: A list or tuple of the variables that will be fed.
      feed_data: A generator that produces tuples of the same length as
        feed_vars.
      summary_tag: If provided, the final result of each evaluation will be
        published to this tag.
      evaluation_times: Run this loop for this many times or forever if it is
        `-1`.

    Returns:
      The final evaluation result from `evaluate_model` if `evaluation_times`
      ever ends.
    """
    current_checkpoint = None
    try:
      for i in itertools.count(0):
        # New session each time to reset queues - Yay.
        with self.session() as sess:
          current_checkpoint = self.load_new_checkpoint_when_available(
              sess, current_checkpoint)
          # Create relevant ops before starting queue runners.
          self._run_init_test_vars_op()

          accuracy_result = self.evaluate_model(accuracy,
                                                num_steps,
                                                summary_tag=summary_tag,
                                                print_every=0,
                                                feed_vars=feed_vars,
                                                feed_data=feed_data)
          if not summary_tag:
            print('[%d] %s' % (sess.run(bookkeeper.global_step()),
                               accuracy_result))
          if (i + 1) == evaluation_times:
            return accuracy_result
    finally:
      print('Shutting down')
      sys.stdout.flush()
      self.stop_queues()
Пример #8
0
  def run_model(self,
                op_list,
                num_steps,
                feed_vars=(),
                feed_data=None,
                print_every=100,
                allow_initialize=True):
    """Runs `op_list` for `num_steps`.

    Args:
      op_list: A list of ops to run.
      num_steps: Number of steps to run this for.  If feeds are used, this is a
        maximum.
      feed_vars: The variables to feed.
      feed_data: An iterator that feeds data tuples.
      print_every: Print a log line and checkpoing every so many steps.
      allow_initialize: If True, the model will be initialized if any variable
        is uninitialized, if False the model will not be initialized.
    Returns:
      The final run result as a list.
    Raises:
      ValueError: If feed_data doesn't match feed_vars.
    """
    feed_data = feed_data or itertools.repeat(())

    ops = [bookkeeper.global_step()]
    ops.extend(op_list)

    sess = tf.get_default_session()
    self._init_model(sess, allow_initialize)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    try:
      for i, data in zip(xrange(num_steps), feed_data):
        log_this_time = print_every and i % print_every == 0
        if len(data) != len(feed_vars):
          raise ValueError(
              'feed_data and feed_vars must be the same length: %d vs %d' % (
                  len(data), len(feed_vars)))
        if coord.should_stop():
          break
        if len(feed_vars) != len(data):
          raise ValueError('Feed vars must be the same length as data.')
        if log_this_time and self._summary_writer:
          results = sess.run(ops + [self._summaries],
                             dict(zip(feed_vars, data)))
          self._summary_writer.add_summary(results[-1], results[0])
          results = results[:-1]
        else:
          results = sess.run(ops, dict(zip(feed_vars, data)))
        if log_this_time:
          self._log_and_save(sess, results)

      # Print the last line if it wasn't just printed
      if print_every and not log_this_time:
        self._log_and_save(sess, results)
    except tf.errors.OutOfRangeError:
      print 'Done training -- epoch limit reached'
    finally:
      # When done, ask the threads to stop.
      coord.request_stop()
    coord.join(threads)

    return results
Пример #9
0
    def evaluate_repeatedly(self,
                            accuracy,
                            num_steps,
                            feed_vars=(),
                            feed_data=None,
                            summary_tag=None,
                            evaluation_times=-1):
        """Runs the evaluation in a loop for `evaluation_times`.

    On each iteration, `evaluate_model` is called with the supplied arguments.
    This manages the queue threads itself.

    Args:
      accuracy: The metric that is being evaluated.
      num_steps: The number of steps to run in the evaluator.
      feed_vars: A list or tuple of the variables that will be fed.
      feed_data: A generator that produces tuples of the same length as
        feed_vars.
      summary_tag: If provided, the final result of each evaluation will be
        published to this tag.
      evaluation_times: Run this loop for this many times or forever if it is
        `-1`.
    """
        i = 0
        sess = tf.get_default_session()

        current_checkpoint = self.load_from_checkpoint(sess)
        while not current_checkpoint:
            print('Model not yet available, sleeping for 10 seconds %s.' %
                  os.path.dirname(self._save_path))
            sys.stdout.flush()
            time.sleep(10)
            current_checkpoint = self.load_from_checkpoint(sess)

        # Create relevant ops before starting queue runners.
        self._run_init_test_vars_op()

        try:
            while True:
                i += 1
                accuracy_result = self.evaluate_model(accuracy,
                                                      num_steps,
                                                      summary_tag=summary_tag,
                                                      print_every=0,
                                                      feed_vars=feed_vars,
                                                      feed_data=feed_data)
                if not summary_tag:
                    print('[%d] %s %g' % (sess.run(bookkeeper.global_step()),
                                          summary_tag, accuracy_result))
                if i == evaluation_times:
                    break
                while True:
                    next_checkpoint = self.load_from_checkpoint(sess)
                    if next_checkpoint == current_checkpoint:
                        time.sleep(10)
                    else:
                        break

                current_checkpoint = next_checkpoint
        finally:
            print('Shutting down')
            sys.stdout.flush()
            self.stop_queues()
Пример #10
0
    def run_model(self,
                  op_list,
                  num_steps,
                  feed_vars=(),
                  feed_data=None,
                  print_every=100,
                  allow_initialize=True):
        """Runs `op_list` for `num_steps`.

    Args:
      op_list: A list of ops to run.
      num_steps: Number of steps to run this for.  If feeds are used, this is a
        maximum.
      feed_vars: The variables to feed.
      feed_data: An iterator that feeds data tuples.
      print_every: Print a log line and checkpoing every so many steps.
      allow_initialize: If True, the model will be initialized if any variable
        is uninitialized, if False the model will not be initialized.
    Returns:
      The final run result as a list.
    Raises:
      ValueError: If feed_data doesn't match feed_vars.
    """
        feed_data = feed_data or itertools.repeat(())

        ops = [bookkeeper.global_step()]
        ops.extend(op_list)

        sess = tf.get_default_session()
        self.prepare_model(sess, allow_initialize=allow_initialize)
        results = []
        try:
            for i, data in zip(xrange(num_steps), feed_data):
                log_this_time = print_every and i % print_every == 0
                if len(data) != len(feed_vars):
                    raise ValueError(
                        'feed_data and feed_vars must be the same length: %d vs %d'
                        % (len(data), len(feed_vars)))
                if self._coord.should_stop():
                    print('Coordinator stopped')
                    sys.stdout.flush()
                    self.stop_queues()
                    break
                if len(feed_vars) != len(data):
                    raise ValueError(
                        'Feed vars must be the same length as data.')

                if log_this_time and self._summary_writer:
                    results = sess.run(ops + [self._summaries],
                                       dict(zip(feed_vars, data)))
                    self._summary_writer.add_summary(results[-1], results[0])
                    results = results[:-1]
                else:
                    results = sess.run(ops, dict(zip(feed_vars, data)))
                if log_this_time:
                    self._log_and_save(sess, results)

            # Print the last line if it wasn't just printed
            if print_every and not log_this_time:
                self._log_and_save(sess, results)
        except tf.errors.OutOfRangeError as ex:
            print('Done training -- epoch limit reached %s' % ex)
            sys.stdout.flush()
            self.stop_queues()
        except BaseException as ex:
            print('Exception -- stopping threads: %s' % ex, file=sys.stderr)
            sys.stdout.flush()
            self.stop_queues()
            raise
        return results
Пример #11
0
    def run_model(self, op_list, num_steps, feed_vars=(), feed_data=None, print_every=100, allow_initialize=True):
        """Runs `op_list` for `num_steps`.

    Args:
      op_list: A list of ops to run.
      num_steps: Number of steps to run this for.  If feeds are used, this is a
        maximum.
      feed_vars: The variables to feed.
      feed_data: An iterator that feeds data tuples.
      print_every: Print a log line and checkpoing every so many steps.
      allow_initialize: If True, the model will be initialized if any variable
        is uninitialized, if False the model will not be initialized.
    Returns:
      The final run result as a list.
    Raises:
      ValueError: If feed_data doesn't match feed_vars.
    """
        feed_data = feed_data or itertools.repeat(())

        ops = [bookkeeper.global_step()]
        ops.extend(op_list)

        sess = tf.get_default_session()
        self.prepare_model(sess, allow_initialize=allow_initialize)
        results = []
        try:
            for i, data in zip(xrange(num_steps), feed_data):
                log_this_time = print_every and i % print_every == 0
                if len(data) != len(feed_vars):
                    raise ValueError(
                        "feed_data and feed_vars must be the same length: %d vs %d" % (len(data), len(feed_vars))
                    )
                if self._coord.should_stop():
                    print("Coordinator stopped")
                    sys.stdout.flush()
                    self.stop_queues()
                    break
                if len(feed_vars) != len(data):
                    raise ValueError("Feed vars must be the same length as data.")

                if log_this_time and self._summary_writer:
                    results = sess.run(ops + [self._summaries], dict(zip(feed_vars, data)))
                    self._summary_writer.add_summary(results[-1], results[0])
                    results = results[:-1]
                else:
                    results = sess.run(ops, dict(zip(feed_vars, data)))
                if log_this_time:
                    self._log_and_save(sess, results)

            # Print the last line if it wasn't just printed
            if print_every and not log_this_time:
                self._log_and_save(sess, results)
        except tf.errors.OutOfRangeError as ex:
            print("Done training -- epoch limit reached %s" % ex)
            sys.stdout.flush()
            self.stop_queues()
        except BaseException as ex:
            print("Exception -- stopping threads: %s" % ex, file=sys.stderr)
            sys.stdout.flush()
            self.stop_queues()
            raise
        return results