def testGlobalStep(self): v = tf.Variable(0) b1 = bookkeeper.for_new_graph(global_step=v) with b1.g.as_default(): self.assertEqual(v, bookkeeper.global_step()) with self.assertRaises(ValueError): bookkeeper.for_new_graph(global_step=tf.Variable(1.0))
def slice_constant(data, batch_size=32, name='constant_data', global_step=None): """Provide a slice based on the global_step. This is useful when the entire data array can be stored in memory because it allows you to feed the data very efficiently. Args: data: A numpy array or tensor. batch_size: The batch size for the produced data. name: An optional name for this data. global_step: A global step variable that is used to read the data. If None then the default prettytensor global_step is used. Returns: A tensor that produces the given data. """ with tf.name_scope(name): all_data = tf.convert_to_tensor(data) global_step = global_step or bookkeeper.global_step() count = len(data) / batch_size extra = len(data) - count * batch_size if extra: offset = tf.mod(global_step, count) return tf.slice(all_data, offset * batch_size, batch_size) else: offset = tf.mod(global_step, count + 1) return tf.slice(all_data, offset * batch_size, tf.where(tf.equal(offset, count), extra, batch_size))
def slice_constant(data, batch_size=32, name='constant_data', global_step=None): """Provide a slice based on the global_step. This is useful when the entire data array can be stored in memory because it allows you to feed the data very efficiently. Args: data: A numpy array or tensor. batch_size: The batch size for the produced data. name: An optional name for this data. global_step: A global step variable that is used to read the data. If None then the default prettytensor global_step is used. Returns: A tensor that produces the given data. """ with tf.name_scope(name): all_data = tf.convert_to_tensor(data) global_step = global_step or bookkeeper.global_step() count = len(data) / batch_size extra = len(data) - count * batch_size if extra: offset = tf.mod(global_step, count) return tf.slice(all_data, offset * batch_size, batch_size) else: offset = tf.mod(global_step, count + 1) return tf.slice(all_data, offset * batch_size, tf.select(tf.equal(offset, count), extra, batch_size))
def evaluate_repeatedly( self, accuracy, num_steps, feed_vars=(), feed_data=None, summary_tag=None, evaluation_times=-1 ): """Runs the evaluation in a loop for `evaluation_times`. On each iteration, `evaluate_model` is called with the supplied arguments. This manages the queue threads itself. Args: accuracy: The metric that is being evaluated. num_steps: The number of steps to run in the evaluator. feed_vars: A list or tuple of the variables that will be fed. feed_data: A generator that produces tuples of the same length as feed_vars. summary_tag: If provided, the final result of each evaluation will be published to this tag. evaluation_times: Run this loop for this many times or forever if it is `-1`. """ i = 0 sess = tf.get_default_session() current_checkpoint = self.load_from_checkpoint(sess) while not current_checkpoint: print("Model not yet available, sleeping for 10 seconds %s." % os.path.dirname(self._save_path)) sys.stdout.flush() time.sleep(10) current_checkpoint = self.load_from_checkpoint(sess) # Create relevant ops before starting queue runners. self._run_init_test_vars_op() try: while True: i += 1 accuracy_result = self.evaluate_model( accuracy, num_steps, summary_tag=summary_tag, print_every=0, feed_vars=feed_vars, feed_data=feed_data, ) if not summary_tag: print("[%d] %s %g" % (sess.run(bookkeeper.global_step()), summary_tag, accuracy_result)) if i == evaluation_times: break while True: next_checkpoint = self.load_from_checkpoint(sess) if next_checkpoint == current_checkpoint: time.sleep(10) else: break current_checkpoint = next_checkpoint finally: print("Shutting down") sys.stdout.flush() self.stop_queues()
def evaluate_repeatedly(self, accuracy, num_steps, feed_vars=(), feed_data=None, summary_tag=None, evaluation_times=-1): """Runs the evaluation in a loop for `evaluation_times`. On each iteration, `evaluate_model` is called with the supplied arguments. This manages the queue threads itself. Args: accuracy: The metric that is being evaluated. num_steps: The number of steps to run in the evaluator. feed_vars: A list or tuple of the variables that will be fed. feed_data: A generator that produces tuples of the same length as feed_vars. summary_tag: If provided, the final result of each evaluation will be published to this tag. evaluation_times: Run this loop for this many times or forever if it is `-1`. Returns: The final evaluation result from `evaluate_model` if `evaluation_times` ever ends. """ current_checkpoint = None try: for i in itertools.count(0): # New session each time to reset queues - Yay. with self.session() as sess: current_checkpoint = self.load_new_checkpoint_when_available( sess, current_checkpoint) # Create relevant ops before starting queue runners. self._run_init_test_vars_op() accuracy_result = self.evaluate_model( accuracy, num_steps, summary_tag=summary_tag, print_every=0, feed_vars=feed_vars, feed_data=feed_data) if not summary_tag: print('[%d] %s' % (sess.run( bookkeeper.global_step()), accuracy_result)) if (i + 1) == evaluation_times: return accuracy_result finally: print('Shutting down') sys.stdout.flush() self.stop_queues()
def evaluate_repeatedly(self, accuracy, num_steps, feed_vars=(), feed_data=None, summary_tag=None, evaluation_times=-1): """Runs the evaluation in a loop for `evaluation_times`. On each iteration, `evaluate_model` is called with the supplied arguments. This manages the queue threads itself. Args: accuracy: The metric that is being evaluated. num_steps: The number of steps to run in the evaluator. feed_vars: A list or tuple of the variables that will be fed. feed_data: A generator that produces tuples of the same length as feed_vars. summary_tag: If provided, the final result of each evaluation will be published to this tag. evaluation_times: Run this loop for this many times or forever if it is `-1`. Returns: The final evaluation result from `evaluate_model` if `evaluation_times` ever ends. """ current_checkpoint = None try: for i in itertools.count(0): # New session each time to reset queues - Yay. with self.session() as sess: current_checkpoint = self.load_new_checkpoint_when_available( sess, current_checkpoint) # Create relevant ops before starting queue runners. self._run_init_test_vars_op() accuracy_result = self.evaluate_model(accuracy, num_steps, summary_tag=summary_tag, print_every=0, feed_vars=feed_vars, feed_data=feed_data) if not summary_tag: print('[%d] %s' % (sess.run(bookkeeper.global_step()), accuracy_result)) if (i + 1) == evaluation_times: return accuracy_result finally: print('Shutting down') sys.stdout.flush() self.stop_queues()
def run_model(self, op_list, num_steps, feed_vars=(), feed_data=None, print_every=100, allow_initialize=True): """Runs `op_list` for `num_steps`. Args: op_list: A list of ops to run. num_steps: Number of steps to run this for. If feeds are used, this is a maximum. feed_vars: The variables to feed. feed_data: An iterator that feeds data tuples. print_every: Print a log line and checkpoing every so many steps. allow_initialize: If True, the model will be initialized if any variable is uninitialized, if False the model will not be initialized. Returns: The final run result as a list. Raises: ValueError: If feed_data doesn't match feed_vars. """ feed_data = feed_data or itertools.repeat(()) ops = [bookkeeper.global_step()] ops.extend(op_list) sess = tf.get_default_session() self._init_model(sess, allow_initialize) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for i, data in zip(xrange(num_steps), feed_data): log_this_time = print_every and i % print_every == 0 if len(data) != len(feed_vars): raise ValueError( 'feed_data and feed_vars must be the same length: %d vs %d' % ( len(data), len(feed_vars))) if coord.should_stop(): break if len(feed_vars) != len(data): raise ValueError('Feed vars must be the same length as data.') if log_this_time and self._summary_writer: results = sess.run(ops + [self._summaries], dict(zip(feed_vars, data))) self._summary_writer.add_summary(results[-1], results[0]) results = results[:-1] else: results = sess.run(ops, dict(zip(feed_vars, data))) if log_this_time: self._log_and_save(sess, results) # Print the last line if it wasn't just printed if print_every and not log_this_time: self._log_and_save(sess, results) except tf.errors.OutOfRangeError: print 'Done training -- epoch limit reached' finally: # When done, ask the threads to stop. coord.request_stop() coord.join(threads) return results
def evaluate_repeatedly(self, accuracy, num_steps, feed_vars=(), feed_data=None, summary_tag=None, evaluation_times=-1): """Runs the evaluation in a loop for `evaluation_times`. On each iteration, `evaluate_model` is called with the supplied arguments. This manages the queue threads itself. Args: accuracy: The metric that is being evaluated. num_steps: The number of steps to run in the evaluator. feed_vars: A list or tuple of the variables that will be fed. feed_data: A generator that produces tuples of the same length as feed_vars. summary_tag: If provided, the final result of each evaluation will be published to this tag. evaluation_times: Run this loop for this many times or forever if it is `-1`. """ i = 0 sess = tf.get_default_session() current_checkpoint = self.load_from_checkpoint(sess) while not current_checkpoint: print('Model not yet available, sleeping for 10 seconds %s.' % os.path.dirname(self._save_path)) sys.stdout.flush() time.sleep(10) current_checkpoint = self.load_from_checkpoint(sess) # Create relevant ops before starting queue runners. self._run_init_test_vars_op() try: while True: i += 1 accuracy_result = self.evaluate_model(accuracy, num_steps, summary_tag=summary_tag, print_every=0, feed_vars=feed_vars, feed_data=feed_data) if not summary_tag: print('[%d] %s %g' % (sess.run(bookkeeper.global_step()), summary_tag, accuracy_result)) if i == evaluation_times: break while True: next_checkpoint = self.load_from_checkpoint(sess) if next_checkpoint == current_checkpoint: time.sleep(10) else: break current_checkpoint = next_checkpoint finally: print('Shutting down') sys.stdout.flush() self.stop_queues()
def run_model(self, op_list, num_steps, feed_vars=(), feed_data=None, print_every=100, allow_initialize=True): """Runs `op_list` for `num_steps`. Args: op_list: A list of ops to run. num_steps: Number of steps to run this for. If feeds are used, this is a maximum. feed_vars: The variables to feed. feed_data: An iterator that feeds data tuples. print_every: Print a log line and checkpoing every so many steps. allow_initialize: If True, the model will be initialized if any variable is uninitialized, if False the model will not be initialized. Returns: The final run result as a list. Raises: ValueError: If feed_data doesn't match feed_vars. """ feed_data = feed_data or itertools.repeat(()) ops = [bookkeeper.global_step()] ops.extend(op_list) sess = tf.get_default_session() self.prepare_model(sess, allow_initialize=allow_initialize) results = [] try: for i, data in zip(xrange(num_steps), feed_data): log_this_time = print_every and i % print_every == 0 if len(data) != len(feed_vars): raise ValueError( 'feed_data and feed_vars must be the same length: %d vs %d' % (len(data), len(feed_vars))) if self._coord.should_stop(): print('Coordinator stopped') sys.stdout.flush() self.stop_queues() break if len(feed_vars) != len(data): raise ValueError( 'Feed vars must be the same length as data.') if log_this_time and self._summary_writer: results = sess.run(ops + [self._summaries], dict(zip(feed_vars, data))) self._summary_writer.add_summary(results[-1], results[0]) results = results[:-1] else: results = sess.run(ops, dict(zip(feed_vars, data))) if log_this_time: self._log_and_save(sess, results) # Print the last line if it wasn't just printed if print_every and not log_this_time: self._log_and_save(sess, results) except tf.errors.OutOfRangeError as ex: print('Done training -- epoch limit reached %s' % ex) sys.stdout.flush() self.stop_queues() except BaseException as ex: print('Exception -- stopping threads: %s' % ex, file=sys.stderr) sys.stdout.flush() self.stop_queues() raise return results
def run_model(self, op_list, num_steps, feed_vars=(), feed_data=None, print_every=100, allow_initialize=True): """Runs `op_list` for `num_steps`. Args: op_list: A list of ops to run. num_steps: Number of steps to run this for. If feeds are used, this is a maximum. feed_vars: The variables to feed. feed_data: An iterator that feeds data tuples. print_every: Print a log line and checkpoing every so many steps. allow_initialize: If True, the model will be initialized if any variable is uninitialized, if False the model will not be initialized. Returns: The final run result as a list. Raises: ValueError: If feed_data doesn't match feed_vars. """ feed_data = feed_data or itertools.repeat(()) ops = [bookkeeper.global_step()] ops.extend(op_list) sess = tf.get_default_session() self.prepare_model(sess, allow_initialize=allow_initialize) results = [] try: for i, data in zip(xrange(num_steps), feed_data): log_this_time = print_every and i % print_every == 0 if len(data) != len(feed_vars): raise ValueError( "feed_data and feed_vars must be the same length: %d vs %d" % (len(data), len(feed_vars)) ) if self._coord.should_stop(): print("Coordinator stopped") sys.stdout.flush() self.stop_queues() break if len(feed_vars) != len(data): raise ValueError("Feed vars must be the same length as data.") if log_this_time and self._summary_writer: results = sess.run(ops + [self._summaries], dict(zip(feed_vars, data))) self._summary_writer.add_summary(results[-1], results[0]) results = results[:-1] else: results = sess.run(ops, dict(zip(feed_vars, data))) if log_this_time: self._log_and_save(sess, results) # Print the last line if it wasn't just printed if print_every and not log_this_time: self._log_and_save(sess, results) except tf.errors.OutOfRangeError as ex: print("Done training -- epoch limit reached %s" % ex) sys.stdout.flush() self.stop_queues() except BaseException as ex: print("Exception -- stopping threads: %s" % ex, file=sys.stderr) sys.stdout.flush() self.stop_queues() raise return results