Python MonitoredTrainingSession示例，tensorflow.python.training.monitored_session.MonitoredTrainingSession Python示例

示例#1

0

显示文件

文件： train.py 项目： talolard/DenseContinuousSentances

def main(__):
    #    label, inputs, lengths = prepareInputsBatch(FLAGS.batch_size)
    train_dir = os.path.join(FLAGS.save_dir, "train", "results")
    val_dir = os.path.join(FLAGS.save_dir, "val", "results")
    test_dir = os.path.join(FLAGS.save_dir, "test", "results")

    DP = DataLoader()
    model = VAE()
    print_paramater_count()
    init = tf.global_variables_initializer()
    with MonitoredTrainingSession(checkpoint_dir=FLAGS.save_dir,
                                  save_summaries_steps=20,
                                  hooks=[]) as sess:

        sess.run(init, )

        for epoch in range(FLAGS.num_epochs):
            for batch_num, batch in enumerate(DP.get_batch()):
                _, loss, summary = sess.run(
                    [model.train_op, model.loss_op, model.summaries],
                    feed_dict={model.input: batch})

                if batch_num % 100 == 0:
                    preds = sess.run(model.preds_op,
                                     feed_dict={model.input: batch})
                    inp = batch[0]
                    pred = preds[0]
                    print(loss)
                    print(DP.num_to_str(inp))
                    print(DP.num_to_str(pred))
            run_and_save_generation(DP, batch, epoch, model, sess)

示例#2

0

显示文件

文件： full_flow.py 项目： talolard/QuoraKaggleTF

def main(__):
    #    label, inputs, lengths = prepareInputsBatch(FLAGS.batch_size)
    train_dir = os.path.join(FLAGS.save_dir,"train","results")
    val_dir = os.path.join(FLAGS.save_dir, "val","results")
    test_dir = os.path.join(FLAGS.save_dir, "test","results")
    gs = tf.contrib.framework.get_or_create_global_step()
    model = BytenetQuora(gs)
    init = tf.global_variables_initializer()
    total_parameters = 0
    print_paramater_count(total_parameters)
    train_writer = tf.summary.FileWriter(train_dir)
    val_writer = tf.summary.FileWriter(val_dir)
    test_writer = tf.summary.FileWriter(test_dir)

    class _LoggerHook(tf.train.SessionRunHook):
        """Logs loss and runtime."""

        def begin(self):
            self._step = -1
            self._start_time = time.time()

        def before_run(self, run_context):
            self._step += 1
            return tf.train.SessionRunArgs(model.loss_op)  # Asks for loss value.

        def after_run(self, run_context, run_values):
            if self._step % FLAGS.log_frequency == 0:
                current_time = time.time()
                duration = current_time - self._start_time
                self._start_time = current_time

                loss_value = run_values.results
                examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                sec_per_batch = float(duration / FLAGS.log_frequency)

                format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                              'sec/batch)')
                print(format_str % (datetime.now(), self._step, loss_value,
                                    examples_per_sec, sec_per_batch))

    with MonitoredTrainingSession(
            checkpoint_dir=FLAGS.save_dir,
            save_summaries_steps=0,
            hooks=[]

    ) as sess:
        sess.run(init,)
        DP =DataProvider(mode=FLAGS.mode)
        for epoch in range(FLAGS.num_epochs):
            for batch_num,batch in enumerate(DP.train_batch(FLAGS.batch_size)):
                do_train_step(batch, batch_num, model, sess, train_writer)
            if FLAGS.mode != "test" or epoch >20:
                do_val_dlow(DP, epoch, model, sess, val_writer)
            print("Starting test")

示例#3

0

显示文件

    def test_print_tensor(self):
        hook = IPULoggingTensorHook(at_end=True)

        def model():
            t = constant_op.constant(42.0, name="foo")
            return hook.log(t)

        with ipu_scope("/device:IPU:0"):
            compiled_model = ipu_compiler.compile(model)

        with test.mock.patch.object(tf_logging, "info", self.mock_log):
            with MonitoredTrainingSession(hooks=[hook]) as mon_sess:
                mon_sess.run(compiled_model)

        self.assertRegex(str(self.logged_message), "foo:0 = 42.0")

示例#4

0

显示文件

    def train(self):
        with self.__graph.as_default():
            with MonitoredTrainingSession(checkpoint_dir=LOG_DIR,
                                          save_checkpoint_secs=1) as sess:
                for step in range(1000):
                    if sess.should_stop():
                        break

                    batch_xs, batch_ys = self.mnist.train.next_batch(100)
                    sess.run([self.train_step, self.test_data],
                             feed_dict={
                                 self.x: batch_xs,
                                 self.y_: batch_ys
                             })

                print(
                    sess.run(self.accuracy,
                             feed_dict={
                                 self.x: self.mnist.test.images,
                                 self.y_: self.mnist.test.labels
                             }))

示例#5

0

显示文件

    def test_print_all_at_end(self):
        hook = IPULoggingTensorHook(
            at_end=True, logging_mode=IPULoggingTensorHook.LoggingMode.ALL)

        def body(v):
            logging_op = hook.log({"foo": v})
            with ops.control_dependencies([logging_op]):
                return v + 1

        def model():
            return loops.repeat(2, body, inputs=[1.0])

        with ipu_scope("/device:IPU:0"):
            compiled_model = ipu_compiler.compile(model)

        with test.mock.patch.object(tf_logging, "info", self.mock_log):
            with MonitoredTrainingSession(hooks=[hook]) as mon_sess:
                for _ in range(2):
                    mon_sess.run(compiled_model)

        self.assertRegex(str(self.logged_message), r"foo = \[1. 2. 1. 2.\]")

示例#6

0

显示文件

    def test_print_formatter(self):
        def formatter(args):
            self.assertIsInstance(args, dict)
            return "foobar: {}".format(args)

        hook = IPULoggingTensorHook(at_end=True, formatter=formatter)

        def model():
            t1 = constant_op.constant(42.0, name="foo")
            t2 = constant_op.constant(43.0, name="bar")
            return hook.log([t1, t2])

        with ipu_scope("/device:IPU:0"):
            compiled_model = ipu_compiler.compile(model)

        with test.mock.patch.object(tf_logging, "info", self.mock_log):
            with MonitoredTrainingSession(hooks=[hook]) as mon_sess:
                mon_sess.run(compiled_model)

        self.assertRegex(str(self.logged_message),
                         r"foobar: \{'foo:0': 42.0, 'bar:0': 43.0\}")

示例#7

0

显示文件

    def test_print_every_n_secs(self, mock_time):
        hook = IPULoggingTensorHook(every_n_secs=0.5)

        def model():
            return hook.log({"log": constant_op.constant(0)})

        with ipu_scope("/device:IPU:0"):
            compiled_model = ipu_compiler.compile(model)

        with test.mock.patch.object(tf_logging, "info", self.mock_log):
            with MonitoredTrainingSession(hooks=[hook]) as mon_sess:
                mock_time.return_value = 1.0
                mon_sess.run(compiled_model)
                self.assertRegex(str(self.logged_message), "log = 0")

                self.logged_message = ""
                mock_time.return_value = 1.49
                mon_sess.run(compiled_model)
                self.assertEqual(self.logged_message, "")

                mock_time.return_value = 1.5
                mon_sess.run(compiled_model)
                self.assertRegex(str(self.logged_message), "log = 0")

示例#8

0

显示文件

    def test_print_every_n_iter(self):
        hook = IPULoggingTensorHook(every_n_iter=2)

        def model():
            step = variables.Variable(0)
            return hook.log({"step": step.assign_add(1).value()})

        with ipu_scope("/device:IPU:0"):
            compiled_model = ipu_compiler.compile(model)

        with test.mock.patch.object(tf_logging, "info", self.mock_log):
            # Test re-using the hook.
            for _ in range(2):
                with MonitoredTrainingSession(hooks=[hook]) as mon_sess:
                    mon_sess.run(compiled_model)
                    self.assertRegex(str(self.logged_message), "step = 1")

                    self.logged_message = ""
                    mon_sess.run(compiled_model)
                    self.assertEqual(self.logged_message, "")

                    mon_sess.run(compiled_model)
                    self.assertRegex(str(self.logged_message), "step = 3")

示例#9

0

显示文件

    def test_two_hooks(self):
        hook1 = IPULoggingTensorHook(every_n_iter=1, feed_name="feed1")
        hook2 = IPULoggingTensorHook(
            every_n_iter=2,
            feed_name="feed2",
            logging_mode=IPULoggingTensorHook.LoggingMode.ALL)

        def model():
            step = variables.Variable(0)
            updated = step.assign_add(1).value()
            return hook1.log({"hook1": updated}), hook2.log({"hook2": updated})

        with ipu_scope("/device:IPU:0"):
            compiled_model = ipu_compiler.compile(model)

        logged_messages = []

        def mock_log(*args, **kwargs):
            del kwargs
            logged_messages.append(str(args))

        with MonitoredTrainingSession(hooks=[hook1, hook2]) as mon_sess:
            with test.mock.patch.object(tf_logging, "info", mock_log):
                mon_sess.run(compiled_model)
                self.assertEqual(len(logged_messages), 2)
                self.assertRegex(logged_messages[0], "hook1 = 1")
                self.assertRegex(logged_messages[1], r"hook2 = \[1\]")

                mon_sess.run(compiled_model)
                self.assertEqual(len(logged_messages), 3)
                self.assertRegex(logged_messages[2], "hook1 = 2")

                mon_sess.run(compiled_model)
                self.assertEqual(len(logged_messages), 5)
                self.assertRegex(logged_messages[3], "hook1 = 3")
                self.assertRegex(logged_messages[4], r"hook2 = \[2 3\]")

示例#10

0

显示文件

文件： train.py 项目： talolard/QuoraKaggleTF

def main(__):
    #    label, inputs, lengths = prepareInputsBatch(FLAGS.batch_size)
    gs = tf.contrib.framework.get_or_create_global_step()
    model = BytenetQuora(gs)
    init = tf.global_variables_initializer()
    total_parameters = 0
    for variable in tf.trainable_variables():
        # shape is an array of tf.Dimension
        shape = variable.get_shape()
        print(shape)
        print(len(shape))
        variable_parametes = 1
        for dim in shape:
            print(dim)
            variable_parametes *= dim.value
        print(variable_parametes)
        total_parameters += variable_parametes
    print(total_parameters)

    with MonitoredTrainingSession(
            checkpoint_dir=FLAGS.save_dir,
            save_summaries_steps=10,
            #hooks=[NanTensorHook(model.loss_op)]
    ) as sess:

        #init.run()
        # Start populating the filename queue.
        # coord = tf.train.Coordinator()
        # threads = tf.train.start_queue_runners(sess=sess,coord=coord)
        i = 0
        #s1,s2,l1,l2,lab = sess.run(inputs+lengths+[label])
        s1 = np.random.randint(0, 100, [FLAGS.batch_size, 25])
        s2 = np.random.randint(0, 100, [FLAGS.batch_size, 25])
        l1 = np.random.randint(0, 25, [
            FLAGS.batch_size,
        ])
        l2 = np.random.randint(0, 25, [
            FLAGS.batch_size,
        ])
        label = np.random.randint(0, 2, [FLAGS.batch_size])
        feed = {
            model.s1: s1,
            model.s2: s2,
            model.l1: l1,
            model.l2: l2,
            model.labels: label
        }
        sess.run(init, feed_dict=feed)
        while True:
            feed = {
                model.s1: s1,
                model.s2: s2,
                model.l1: l1,
                model.l2: l2,
                model.labels: label
            }
            _, loss_val, _ = sess.run(
                [model.train_op, model.loss_op, model.metrics_op],
                feed_dict=feed)
            print(i, loss_val)
            i += 1
        coord.request_stop()
        coord.join(threads)