def main(__): # label, inputs, lengths = prepareInputsBatch(FLAGS.batch_size) train_dir = os.path.join(FLAGS.save_dir, "train", "results") val_dir = os.path.join(FLAGS.save_dir, "val", "results") test_dir = os.path.join(FLAGS.save_dir, "test", "results") DP = DataLoader() model = VAE() print_paramater_count() init = tf.global_variables_initializer() with MonitoredTrainingSession(checkpoint_dir=FLAGS.save_dir, save_summaries_steps=20, hooks=[]) as sess: sess.run(init, ) for epoch in range(FLAGS.num_epochs): for batch_num, batch in enumerate(DP.get_batch()): _, loss, summary = sess.run( [model.train_op, model.loss_op, model.summaries], feed_dict={model.input: batch}) if batch_num % 100 == 0: preds = sess.run(model.preds_op, feed_dict={model.input: batch}) inp = batch[0] pred = preds[0] print(loss) print(DP.num_to_str(inp)) print(DP.num_to_str(pred)) run_and_save_generation(DP, batch, epoch, model, sess)
def main(__): # label, inputs, lengths = prepareInputsBatch(FLAGS.batch_size) train_dir = os.path.join(FLAGS.save_dir,"train","results") val_dir = os.path.join(FLAGS.save_dir, "val","results") test_dir = os.path.join(FLAGS.save_dir, "test","results") gs = tf.contrib.framework.get_or_create_global_step() model = BytenetQuora(gs) init = tf.global_variables_initializer() total_parameters = 0 print_paramater_count(total_parameters) train_writer = tf.summary.FileWriter(train_dir) val_writer = tf.summary.FileWriter(val_dir) test_writer = tf.summary.FileWriter(test_dir) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(model.loss_op) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with MonitoredTrainingSession( checkpoint_dir=FLAGS.save_dir, save_summaries_steps=0, hooks=[] ) as sess: sess.run(init,) DP =DataProvider(mode=FLAGS.mode) for epoch in range(FLAGS.num_epochs): for batch_num,batch in enumerate(DP.train_batch(FLAGS.batch_size)): do_train_step(batch, batch_num, model, sess, train_writer) if FLAGS.mode != "test" or epoch >20: do_val_dlow(DP, epoch, model, sess, val_writer) print("Starting test")
def test_print_tensor(self): hook = IPULoggingTensorHook(at_end=True) def model(): t = constant_op.constant(42.0, name="foo") return hook.log(t) with ipu_scope("/device:IPU:0"): compiled_model = ipu_compiler.compile(model) with test.mock.patch.object(tf_logging, "info", self.mock_log): with MonitoredTrainingSession(hooks=[hook]) as mon_sess: mon_sess.run(compiled_model) self.assertRegex(str(self.logged_message), "foo:0 = 42.0")
def train(self): with self.__graph.as_default(): with MonitoredTrainingSession(checkpoint_dir=LOG_DIR, save_checkpoint_secs=1) as sess: for step in range(1000): if sess.should_stop(): break batch_xs, batch_ys = self.mnist.train.next_batch(100) sess.run([self.train_step, self.test_data], feed_dict={ self.x: batch_xs, self.y_: batch_ys }) print( sess.run(self.accuracy, feed_dict={ self.x: self.mnist.test.images, self.y_: self.mnist.test.labels }))
def test_print_all_at_end(self): hook = IPULoggingTensorHook( at_end=True, logging_mode=IPULoggingTensorHook.LoggingMode.ALL) def body(v): logging_op = hook.log({"foo": v}) with ops.control_dependencies([logging_op]): return v + 1 def model(): return loops.repeat(2, body, inputs=[1.0]) with ipu_scope("/device:IPU:0"): compiled_model = ipu_compiler.compile(model) with test.mock.patch.object(tf_logging, "info", self.mock_log): with MonitoredTrainingSession(hooks=[hook]) as mon_sess: for _ in range(2): mon_sess.run(compiled_model) self.assertRegex(str(self.logged_message), r"foo = \[1. 2. 1. 2.\]")
def test_print_formatter(self): def formatter(args): self.assertIsInstance(args, dict) return "foobar: {}".format(args) hook = IPULoggingTensorHook(at_end=True, formatter=formatter) def model(): t1 = constant_op.constant(42.0, name="foo") t2 = constant_op.constant(43.0, name="bar") return hook.log([t1, t2]) with ipu_scope("/device:IPU:0"): compiled_model = ipu_compiler.compile(model) with test.mock.patch.object(tf_logging, "info", self.mock_log): with MonitoredTrainingSession(hooks=[hook]) as mon_sess: mon_sess.run(compiled_model) self.assertRegex(str(self.logged_message), r"foobar: \{'foo:0': 42.0, 'bar:0': 43.0\}")
def test_print_every_n_secs(self, mock_time): hook = IPULoggingTensorHook(every_n_secs=0.5) def model(): return hook.log({"log": constant_op.constant(0)}) with ipu_scope("/device:IPU:0"): compiled_model = ipu_compiler.compile(model) with test.mock.patch.object(tf_logging, "info", self.mock_log): with MonitoredTrainingSession(hooks=[hook]) as mon_sess: mock_time.return_value = 1.0 mon_sess.run(compiled_model) self.assertRegex(str(self.logged_message), "log = 0") self.logged_message = "" mock_time.return_value = 1.49 mon_sess.run(compiled_model) self.assertEqual(self.logged_message, "") mock_time.return_value = 1.5 mon_sess.run(compiled_model) self.assertRegex(str(self.logged_message), "log = 0")
def test_print_every_n_iter(self): hook = IPULoggingTensorHook(every_n_iter=2) def model(): step = variables.Variable(0) return hook.log({"step": step.assign_add(1).value()}) with ipu_scope("/device:IPU:0"): compiled_model = ipu_compiler.compile(model) with test.mock.patch.object(tf_logging, "info", self.mock_log): # Test re-using the hook. for _ in range(2): with MonitoredTrainingSession(hooks=[hook]) as mon_sess: mon_sess.run(compiled_model) self.assertRegex(str(self.logged_message), "step = 1") self.logged_message = "" mon_sess.run(compiled_model) self.assertEqual(self.logged_message, "") mon_sess.run(compiled_model) self.assertRegex(str(self.logged_message), "step = 3")
def test_two_hooks(self): hook1 = IPULoggingTensorHook(every_n_iter=1, feed_name="feed1") hook2 = IPULoggingTensorHook( every_n_iter=2, feed_name="feed2", logging_mode=IPULoggingTensorHook.LoggingMode.ALL) def model(): step = variables.Variable(0) updated = step.assign_add(1).value() return hook1.log({"hook1": updated}), hook2.log({"hook2": updated}) with ipu_scope("/device:IPU:0"): compiled_model = ipu_compiler.compile(model) logged_messages = [] def mock_log(*args, **kwargs): del kwargs logged_messages.append(str(args)) with MonitoredTrainingSession(hooks=[hook1, hook2]) as mon_sess: with test.mock.patch.object(tf_logging, "info", mock_log): mon_sess.run(compiled_model) self.assertEqual(len(logged_messages), 2) self.assertRegex(logged_messages[0], "hook1 = 1") self.assertRegex(logged_messages[1], r"hook2 = \[1\]") mon_sess.run(compiled_model) self.assertEqual(len(logged_messages), 3) self.assertRegex(logged_messages[2], "hook1 = 2") mon_sess.run(compiled_model) self.assertEqual(len(logged_messages), 5) self.assertRegex(logged_messages[3], "hook1 = 3") self.assertRegex(logged_messages[4], r"hook2 = \[2 3\]")
def main(__): # label, inputs, lengths = prepareInputsBatch(FLAGS.batch_size) gs = tf.contrib.framework.get_or_create_global_step() model = BytenetQuora(gs) init = tf.global_variables_initializer() total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() print(shape) print(len(shape)) variable_parametes = 1 for dim in shape: print(dim) variable_parametes *= dim.value print(variable_parametes) total_parameters += variable_parametes print(total_parameters) with MonitoredTrainingSession( checkpoint_dir=FLAGS.save_dir, save_summaries_steps=10, #hooks=[NanTensorHook(model.loss_op)] ) as sess: #init.run() # Start populating the filename queue. # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(sess=sess,coord=coord) i = 0 #s1,s2,l1,l2,lab = sess.run(inputs+lengths+[label]) s1 = np.random.randint(0, 100, [FLAGS.batch_size, 25]) s2 = np.random.randint(0, 100, [FLAGS.batch_size, 25]) l1 = np.random.randint(0, 25, [ FLAGS.batch_size, ]) l2 = np.random.randint(0, 25, [ FLAGS.batch_size, ]) label = np.random.randint(0, 2, [FLAGS.batch_size]) feed = { model.s1: s1, model.s2: s2, model.l1: l1, model.l2: l2, model.labels: label } sess.run(init, feed_dict=feed) while True: feed = { model.s1: s1, model.s2: s2, model.l1: l1, model.l2: l2, model.labels: label } _, loss_val, _ = sess.run( [model.train_op, model.loss_op, model.metrics_op], feed_dict=feed) print(i, loss_val) i += 1 coord.request_stop() coord.join(threads)