示例#1
0
    def benchmarkEagerL2hmc(self):
        """Benchmark Eager performance."""

        hparams = get_default_hparams()
        dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                  loglikelihood_fn=l2hmc.get_scg_energy_fn(),
                                  n_steps=hparams.n_steps,
                                  eps=hparams.eps)
        # TODO(lxuechen): Add learning rate decay
        optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)

        # Warmup to reduce initialization effect when timing
        l2hmc.warmup(dynamics, optimizer, n_iters=hparams.n_warmup_iters)

        # Time
        start_time = time.time()
        l2hmc.fit(dynamics,
                  optimizer,
                  n_samples=hparams.n_samples,
                  n_iters=hparams.n_iters)
        wall_time = time.time() - start_time
        examples_per_sec = hparams.n_samples / wall_time

        self.report_benchmark(name="eager_train_%s" %
                              ("gpu" if tfe.num_gpus() > 0 else "cpu"),
                              iters=hparams.n_iters,
                              extras={"examples_per_sec": examples_per_sec},
                              wall_time=wall_time)
示例#2
0
    def benchmark_graph(self):
        """Benchmark Graph performance."""

        hparams = get_default_hparams()
        tf.reset_default_graph()
        with tf.Graph().as_default():
            energy_fn, _, _ = l2hmc.get_scg_energy_fn()
            dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                      minus_loglikelihood_fn=energy_fn,
                                      n_steps=hparams.n_steps,
                                      eps=hparams.eps)
            x = tf.placeholder(tf.float32, shape=[None, hparams.x_dim])
            loss, x_out, _ = l2hmc.compute_loss(dynamics, x)

            global_step = tf.Variable(0., name="global_step", trainable=False)
            learning_rate = tf.train.exponential_decay(hparams.learning_rate,
                                                       global_step,
                                                       1000,
                                                       0.96,
                                                       staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            train_op = optimizer.minimize(loss, global_step=global_step)

            # Single thread; fairer comparison against eager
            session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                                          inter_op_parallelism_threads=1)

            with tf.Session(config=session_conf) as sess:
                sess.run(tf.global_variables_initializer())

                # Warmup to reduce initialization effect when timing
                samples = npr.normal(size=[hparams.n_samples, hparams.x_dim])
                for _ in range(hparams.n_warmup_iters):
                    _, _, _, _ = sess.run(
                        [x_out, loss, train_op, learning_rate],
                        feed_dict={x: samples})

                # Training
                start_time = time.time()
                for i in range(hparams.n_iters):
                    samples, loss_np, _, _ = sess.run(
                        [x_out, loss, train_op, learning_rate],
                        feed_dict={x: samples})
                    print("Iteration %d: loss %.4f" % (i, loss_np))
                wall_time = time.time() - start_time
                examples_per_sec = hparams.n_samples / wall_time

                self.report_benchmark(
                    name="graph_train_%s" %
                    ("gpu" if tf.test.is_gpu_available() else "cpu"),
                    iters=hparams.n_iters,
                    extras={"examples_per_sec": examples_per_sec},
                    wall_time=wall_time)
示例#3
0
    def _get_energy_fn(self):
        """Get specific energy function according to FLAGS."""

        if FLAGS.energy_fn == "scg":
            energy_fn = l2hmc.get_scg_energy_fn()
        elif FLAGS.energy_fn == "multivariate_gaussian":
            energy_fn = l2hmc.get_multivariate_gaussian_energy_fn(
                x_dim=FLAGS.x_dim)
        else:
            raise ValueError("No such energy function %s" % FLAGS.energy_fn)

        return energy_fn
示例#4
0
  def test_apply_transition(self):
    """Testing function `Dynamics.apply_transition` in graph and eager mode."""

    # Eager mode testing
    hparams = get_default_hparams()
    energy_fn, _, _ = l2hmc.get_scg_energy_fn()
    dynamics = l2hmc.Dynamics(
        x_dim=hparams.x_dim,
        minus_loglikelihood_fn=energy_fn,
        n_steps=hparams.n_steps,
        eps=hparams.eps)
    samples = tf.random_normal(shape=[hparams.n_samples, hparams.x_dim])
    x_, v_, x_accept_prob, x_out = dynamics.apply_transition(samples)

    self.assertEqual(x_.shape, v_.shape)
    self.assertEqual(x_out.shape, samples.shape)
    self.assertEqual(x_.shape, x_out.shape)
    self.assertEqual(x_accept_prob.shape, (hparams.n_samples,))

    # Graph mode testing
    with tf.Graph().as_default():
      energy_fn, _, _ = l2hmc.get_scg_energy_fn()
      dynamics = l2hmc.Dynamics(
          x_dim=hparams.x_dim,
          minus_loglikelihood_fn=energy_fn,
          n_steps=hparams.n_steps,
          eps=hparams.eps)
      x = tf.placeholder(tf.float32, shape=[None, hparams.x_dim])
      x_, v_, x_accept_prob, x_out = dynamics.apply_transition(x)
      samples = npr.normal(size=[hparams.n_samples, hparams.x_dim])

      with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        np_x_, np_v_, np_x_accept_prob, np_x_out = sess.run(
            [x_, v_, x_accept_prob, x_out], feed_dict={x: samples})

        self.assertEqual(np_x_.shape, np_v_.shape)
        self.assertEqual(samples.shape, np_x_out.shape)
        self.assertEqual(np_x_.shape, np_x_out.shape)
        self.assertEqual(np_x_accept_prob.shape, (hparams.n_samples,))
示例#5
0
    def test_apply_transition(self):
        """Testing function `Dynamics.apply_transition` in graph and eager mode."""

        # Eager mode testing
        hparams = get_default_hparams()
        energy_fn, _, _ = l2hmc.get_scg_energy_fn()
        dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                  minus_loglikelihood_fn=energy_fn,
                                  n_steps=hparams.n_steps,
                                  eps=hparams.eps)
        samples = tf.random_normal(shape=[hparams.n_samples, hparams.x_dim])
        x_, v_, x_accept_prob, x_out = dynamics.apply_transition(samples)

        self.assertEqual(x_.shape, v_.shape)
        self.assertEqual(x_out.shape, samples.shape)
        self.assertEqual(x_.shape, x_out.shape)
        self.assertEqual(x_accept_prob.shape, (hparams.n_samples, ))

        # Graph mode testing
        with tf.Graph().as_default():
            energy_fn, _, _ = l2hmc.get_scg_energy_fn()
            dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                      minus_loglikelihood_fn=energy_fn,
                                      n_steps=hparams.n_steps,
                                      eps=hparams.eps)
            x = tf.placeholder(tf.float32, shape=[None, hparams.x_dim])
            x_, v_, x_accept_prob, x_out = dynamics.apply_transition(x)
            samples = npr.normal(size=[hparams.n_samples, hparams.x_dim])

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                np_x_, np_v_, np_x_accept_prob, np_x_out = sess.run(
                    [x_, v_, x_accept_prob, x_out], feed_dict={x: samples})

                self.assertEqual(np_x_.shape, np_v_.shape)
                self.assertEqual(samples.shape, np_x_out.shape)
                self.assertEqual(np_x_.shape, np_x_out.shape)
                self.assertEqual(np_x_accept_prob.shape, (hparams.n_samples, ))
示例#6
0
    def testComputeLoss(self):
        """Testing function l2hmc.compute_loss in both graph and eager mode."""

        # Eager mode testing
        hparams = get_default_hparams()
        dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                  loglikelihood_fn=l2hmc.get_scg_energy_fn(),
                                  n_steps=hparams.n_steps,
                                  eps=hparams.eps)
        samples = tf.random_normal(shape=[hparams.n_samples, hparams.x_dim])
        loss, x_out = l2hmc.compute_loss(samples, dynamics)

        # Check shape and numerical stability
        self.assertEqual(x_out.shape, samples.shape)
        self.assertEqual(loss.shape, [])
        self.assertAllClose(loss.numpy(), loss.numpy(), rtol=1e-5)

        # Graph mode testing
        with tf.Graph().as_default():
            dynamics = l2hmc.Dynamics(
                x_dim=hparams.x_dim,
                loglikelihood_fn=l2hmc.get_scg_energy_fn(),
                n_steps=hparams.n_steps,
                eps=hparams.eps)
            x = tf.placeholder(tf.float32, shape=[None, hparams.x_dim])
            loss, x_out = l2hmc.compute_loss(x, dynamics)
            samples = npr.normal(size=[hparams.n_samples, hparams.x_dim])

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                loss_np, x_out_np = sess.run([loss, x_out],
                                             feed_dict={x: samples})

                # Check shape and numerical stability
                self.assertEqual(x_out_np.shape, samples.shape)
                self.assertEqual(loss_np.shape, ())
                self.assertAllClose(loss_np, loss_np, rtol=1e-5)
示例#7
0
    def benchmarkGraphL2hmc(self):
        """Benchmark Graph performance."""

        hparams = get_default_hparams()
        with tf.Graph().as_default():
            dynamics = l2hmc.Dynamics(
                x_dim=hparams.x_dim,
                loglikelihood_fn=l2hmc.get_scg_energy_fn(),
                n_steps=hparams.n_steps,
                eps=hparams.eps)
            x = tf.placeholder(tf.float32, shape=[None, hparams.x_dim])
            loss, x_out = l2hmc.compute_loss(x, dynamics)

            global_step = tf.Variable(0., name="global_step", trainable=False)
            learning_rate = tf.train.exponential_decay(hparams.learning_rate,
                                                       global_step,
                                                       1000,
                                                       0.96,
                                                       staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            train_op = optimizer.minimize(loss, global_step=global_step)

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())

                # Warmup to reduce initialization effect when timing
                samples = npr.normal(size=[hparams.n_samples, hparams.x_dim])
                for _ in range(hparams.n_warmup_iters):
                    samples, _, _, _ = sess.run(
                        [x_out, loss, train_op, learning_rate],
                        feed_dict={x: samples})

                # Time
                start_time = time.time()
                for _ in range(hparams.n_iters):
                    samples, _, _, _ = sess.run(
                        [x_out, loss, train_op, learning_rate],
                        feed_dict={x: samples})
                wall_time = time.time() - start_time
                examples_per_sec = hparams.n_samples / wall_time

                self.report_benchmark(
                    name="graph_train_%s" %
                    ("gpu" if tf.test.is_gpu_available() else "cpu"),
                    iters=hparams.n_iters,
                    extras={"examples_per_sec": examples_per_sec},
                    wall_time=wall_time)
示例#8
0
    def benchmark_graph(self):
        """Benchmark Graph performance."""

        hparams = get_default_hparams()
        tf.enable_resource_variables()
        for sample_size in [10, 25, 50, 100, 200]:
            hparams.n_samples = sample_size
            tf.reset_default_graph()
            with tf.Graph().as_default():
                energy_fn, _, _ = l2hmc.get_scg_energy_fn()
                x = tf.random_normal([hparams.n_samples, hparams.x_dim],
                                     dtype=tf.float32)
                dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                          minus_loglikelihood_fn=energy_fn,
                                          n_steps=hparams.n_steps,
                                          eps=hparams.eps)
                loss, _, _ = l2hmc.compute_loss(dynamics, x)

                optimizer = tf.train.AdamOptimizer(
                    learning_rate=hparams.learning_rate)
                train_op, loss, _ = graph_step(dynamics, optimizer, x)

                # Single thread; fairer comparison against eager
                session_conf = tf.ConfigProto(inter_op_parallelism_threads=1)

                with tf.Session(config=session_conf) as sess:
                    sess.run(tf.global_variables_initializer())

                    # Warmup to reduce initialization effect when timing
                    for _ in range(hparams.n_warmup_iters):
                        _, _ = sess.run([train_op, loss])

                    # Training
                    start_time = time.time()
                    for i in range(hparams.n_iters):
                        _, loss_np = sess.run([train_op, loss])
                        print("Iteration %d: loss %.4f" % (i, loss_np))
                    wall_time = (time.time() - start_time) / hparams.n_iters
                    examples_per_sec = hparams.n_samples / wall_time

                    self.report_benchmark(
                        name="graph_train_%s_%d" %
                        ("gpu" if tf.test.is_gpu_available() else "cpu",
                         sample_size),
                        iters=hparams.n_iters,
                        extras={"examples_per_sec": examples_per_sec},
                        wall_time=wall_time)
示例#9
0
  def benchmark_graph(self):
    """Benchmark Graph performance."""

    hparams = get_default_hparams()
    tf.enable_resource_variables()
    for sample_size in [10, 25, 50, 100, 200]:
      hparams.n_samples = sample_size
      tf.reset_default_graph()
      with tf.Graph().as_default():
        energy_fn, _, _ = l2hmc.get_scg_energy_fn()
        x = tf.random_normal([hparams.n_samples, hparams.x_dim],
                             dtype=tf.float32)
        dynamics = l2hmc.Dynamics(
            x_dim=hparams.x_dim,
            minus_loglikelihood_fn=energy_fn,
            n_steps=hparams.n_steps,
            eps=hparams.eps)
        loss, _, _ = l2hmc.compute_loss(dynamics, x)

        optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
        train_op, loss, _ = graph_step(dynamics, optimizer, x)

        # Single thread; fairer comparison against eager
        session_conf = tf.ConfigProto(inter_op_parallelism_threads=1)

        with tf.Session(config=session_conf) as sess:
          sess.run(tf.global_variables_initializer())

          # Warmup to reduce initialization effect when timing
          for _ in range(hparams.n_warmup_iters):
            _, _ = sess.run([train_op, loss])

          # Training
          start_time = time.time()
          for i in range(hparams.n_iters):
            _, loss_np = sess.run([train_op, loss])
            print("Iteration %d: loss %.4f" % (i, loss_np))
          wall_time = (time.time() - start_time) / hparams.n_iters
          examples_per_sec = hparams.n_samples / wall_time

          self.report_benchmark(
              name="graph_train_%s_%d" %
              ("gpu" if tf.test.is_gpu_available() else "cpu", sample_size),
              iters=hparams.n_iters,
              extras={"examples_per_sec": examples_per_sec},
              wall_time=wall_time)
示例#10
0
  def _benchmark_eager(self, defun=False):
    """Benchmark Eager performance."""

    hparams = get_default_hparams()
    for sample_size in [10, 25, 50, 100, 200]:
      hparams.n_samples = sample_size
      energy_fn, _, _ = l2hmc.get_scg_energy_fn()
      dynamics = l2hmc.Dynamics(
          x_dim=hparams.x_dim,
          minus_loglikelihood_fn=energy_fn,
          n_steps=hparams.n_steps,
          eps=hparams.eps)
      optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
      step_fn = tfe.defun(step) if defun else step

      # Warmup to reduce initialization effect when timing
      warmup(
          dynamics,
          optimizer,
          n_iters=hparams.n_warmup_iters,
          n_samples=hparams.n_samples,
          step_fn=step_fn)

      # Training
      samples = tf.random_normal(
          shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32)
      start_time = time.time()
      fit(dynamics,
          samples,
          optimizer,
          step_fn=step_fn,
          n_iters=hparams.n_iters)
      wall_time = (time.time() - start_time) / hparams.n_iters
      examples_per_sec = hparams.n_samples / wall_time

      self.report_benchmark(
          name="eager_train_%s%s_%d" %
          ("gpu" if tf.test.is_gpu_available() else "cpu",
           "_defun" if defun else "", sample_size),
          iters=hparams.n_iters,
          extras={"examples_per_sec": examples_per_sec},
          wall_time=wall_time)

    del dynamics
示例#11
0
    def _benchmark_eager(self, defun=False):
        """Benchmark Eager performance."""

        hparams = get_default_hparams()
        for sample_size in [10, 25, 50, 100, 200]:
            hparams.n_samples = sample_size
            energy_fn, _, _ = l2hmc.get_scg_energy_fn()
            dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                      minus_loglikelihood_fn=energy_fn,
                                      n_steps=hparams.n_steps,
                                      eps=hparams.eps)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=hparams.learning_rate)
            step_fn = tfe.defun(step) if defun else step

            # Warmup to reduce initialization effect when timing
            warmup(dynamics,
                   optimizer,
                   n_iters=hparams.n_warmup_iters,
                   n_samples=hparams.n_samples,
                   step_fn=step_fn)

            # Training
            samples = tf.random_normal(
                shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32)
            start_time = time.time()
            fit(dynamics,
                samples,
                optimizer,
                step_fn=step_fn,
                n_iters=hparams.n_iters)
            wall_time = (time.time() - start_time) / hparams.n_iters
            examples_per_sec = hparams.n_samples / wall_time

            self.report_benchmark(
                name="eager_train_%s%s_%d" %
                ("gpu" if tf.test.is_gpu_available() else "cpu",
                 "_defun" if defun else "", sample_size),
                iters=hparams.n_iters,
                extras={"examples_per_sec": examples_per_sec},
                wall_time=wall_time)

        del dynamics
示例#12
0
def main(_):
    tf.enable_eager_execution()
    global_step = tf.train.get_or_create_global_step()
    global_step.assign(1)

    energy_fn, mean, covar = {
        "scg": l2hmc.get_scg_energy_fn(),
        "rw": l2hmc.get_rw_energy_fn()
    }[FLAGS.energy_fn]

    x_dim = 2
    train_iters = 5000
    eval_iters = 2000
    eps = 0.1
    n_steps = 10  # Chain length
    n_samples = 200
    record_loss_every = 100

    dynamics = l2hmc.Dynamics(x_dim=x_dim,
                              minus_loglikelihood_fn=energy_fn,
                              n_steps=n_steps,
                              eps=eps)
    learning_rate = tf.train.exponential_decay(1e-3,
                                               global_step,
                                               1000,
                                               0.96,
                                               staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    checkpointer = tf.train.Checkpoint(optimizer=optimizer,
                                       dynamics=dynamics,
                                       global_step=global_step)

    if FLAGS.train_dir:
        summary_writer = tf.contrib.summary.create_file_writer(FLAGS.train_dir)
        if FLAGS.restore:
            latest_path = tf.train.latest_checkpoint(FLAGS.train_dir)
            checkpointer.restore(latest_path)
            print("Restored latest checkpoint at path:\"{}\" ".format(
                latest_path))
            sys.stdout.flush()

    if not FLAGS.restore:
        # Training
        if FLAGS.use_defun:
            # Use `tfe.deun` to boost performance when there are lots of small ops
            loss_fn = tfe.defun(l2hmc.compute_loss)
        else:
            loss_fn = l2hmc.compute_loss

        samples = tf.random_normal(shape=[n_samples, x_dim])
        for i in range(1, train_iters + 1):
            loss, samples, accept_prob = train_one_iter(
                dynamics,
                samples,
                optimizer,
                loss_fn=loss_fn,
                global_step=global_step)

            if i % record_loss_every == 0:
                print("Iteration {}, loss {:.4f}, x_accept_prob {:.4f}".format(
                    i, loss.numpy(),
                    accept_prob.numpy().mean()))
                if FLAGS.train_dir:
                    with summary_writer.as_default():
                        with tf.contrib.summary.always_record_summaries():
                            tf.contrib.summary.scalar("Training loss",
                                                      loss,
                                                      step=global_step)
        print("Training complete.")
        sys.stdout.flush()

        if FLAGS.train_dir:
            saved_path = checkpointer.save(
                file_prefix=os.path.join(FLAGS.train_dir, "ckpt"))
            print("Saved checkpoint at path: \"{}\" ".format(saved_path))
            sys.stdout.flush()

    # Evaluation
    if FLAGS.use_defun:
        # Use tfe.deun to boost performance when there are lots of small ops
        apply_transition = tfe.defun(dynamics.apply_transition)
    else:
        apply_transition = dynamics.apply_transition

    samples = tf.random_normal(shape=[n_samples, x_dim])
    samples_history = []
    for i in range(eval_iters):
        samples_history.append(samples.numpy())
        _, _, _, samples = apply_transition(samples)
    samples_history = np.array(samples_history)
    print("Sampling complete.")
    sys.stdout.flush()

    # Mean and covariance of target distribution
    mean = mean.numpy()
    covar = covar.numpy()
    ac_spectrum = compute_ac_spectrum(samples_history, mean, covar)
    print("First 25 entries of the auto-correlation spectrum: {}".format(
        ac_spectrum[:25]))
    ess = compute_ess(ac_spectrum)
    print("Effective sample size per Metropolis-Hastings step: {}".format(ess))
    sys.stdout.flush()

    if FLAGS.train_dir:
        # Plot autocorrelation spectrum in tensorboard
        plot_step = tfe.Variable(1, trainable=False, dtype=tf.int64)

        for ac in ac_spectrum:
            with summary_writer.as_default():
                with tf.contrib.summary.always_record_summaries():
                    tf.contrib.summary.scalar("Autocorrelation",
                                              ac,
                                              step=plot_step)
            plot_step.assign(plot_step + n_steps)

        if HAS_MATPLOTLIB:
            # Choose a single chain and plot the trajectory
            single_chain = samples_history[:, 0, :]
            xs = single_chain[:100, 0]
            ys = single_chain[:100, 1]
            plt.figure()
            plt.plot(xs, ys, color="orange", marker="o",
                     alpha=0.6)  # Trained chain
            plt.savefig(os.path.join(FLAGS.train_dir, "single_chain.png"))
示例#13
0
def main(_):
  tf.enable_eager_execution()
  global_step = tf.train.get_or_create_global_step()
  global_step.assign(1)

  energy_fn, mean, covar = {
      "scg": l2hmc.get_scg_energy_fn(),
      "rw": l2hmc.get_rw_energy_fn()
  }[FLAGS.energy_fn]

  x_dim = 2
  train_iters = 5000
  eval_iters = 2000
  eps = 0.1
  n_steps = 10  # Chain length
  n_samples = 200
  record_loss_every = 100

  dynamics = l2hmc.Dynamics(
      x_dim=x_dim, minus_loglikelihood_fn=energy_fn, n_steps=n_steps, eps=eps)
  learning_rate = tf.train.exponential_decay(
      1e-3, global_step, 1000, 0.96, staircase=True)
  optimizer = tf.train.AdamOptimizer(learning_rate)
  checkpointer = tf.train.Checkpoint(
      optimizer=optimizer, dynamics=dynamics, global_step=global_step)

  if FLAGS.train_dir:
    summary_writer = tf.contrib.summary.create_file_writer(FLAGS.train_dir)
    if FLAGS.restore:
      latest_path = tf.train.latest_checkpoint(FLAGS.train_dir)
      checkpointer.restore(latest_path)
      print("Restored latest checkpoint at path:\"{}\" ".format(latest_path))
      sys.stdout.flush()

  if not FLAGS.restore:
    # Training
    if FLAGS.use_defun:
      # Use `tfe.deun` to boost performance when there are lots of small ops
      loss_fn = tfe.defun(l2hmc.compute_loss)
    else:
      loss_fn = l2hmc.compute_loss

    samples = tf.random_normal(shape=[n_samples, x_dim])
    for i in range(1, train_iters + 1):
      loss, samples, accept_prob = train_one_iter(
          dynamics,
          samples,
          optimizer,
          loss_fn=loss_fn,
          global_step=global_step)

      if i % record_loss_every == 0:
        print("Iteration {}, loss {:.4f}, x_accept_prob {:.4f}".format(
            i, loss.numpy(),
            accept_prob.numpy().mean()))
        if FLAGS.train_dir:
          with summary_writer.as_default():
            with tf.contrib.summary.always_record_summaries():
              tf.contrib.summary.scalar("Training loss", loss, step=global_step)
    print("Training complete.")
    sys.stdout.flush()

    if FLAGS.train_dir:
      saved_path = checkpointer.save(
          file_prefix=os.path.join(FLAGS.train_dir, "ckpt"))
      print("Saved checkpoint at path: \"{}\" ".format(saved_path))
      sys.stdout.flush()

  # Evaluation
  if FLAGS.use_defun:
    # Use tfe.deun to boost performance when there are lots of small ops
    apply_transition = tfe.defun(dynamics.apply_transition)
  else:
    apply_transition = dynamics.apply_transition

  samples = tf.random_normal(shape=[n_samples, x_dim])
  samples_history = []
  for i in range(eval_iters):
    samples_history.append(samples.numpy())
    _, _, _, samples = apply_transition(samples)
  samples_history = np.array(samples_history)
  print("Sampling complete.")
  sys.stdout.flush()

  # Mean and covariance of target distribution
  mean = mean.numpy()
  covar = covar.numpy()
  ac_spectrum = compute_ac_spectrum(samples_history, mean, covar)
  print("First 25 entries of the auto-correlation spectrum: {}".format(
      ac_spectrum[:25]))
  ess = compute_ess(ac_spectrum)
  print("Effective sample size per Metropolis-Hastings step: {}".format(ess))
  sys.stdout.flush()

  if FLAGS.train_dir:
    # Plot autocorrelation spectrum in tensorboard
    plot_step = tfe.Variable(1, trainable=False, dtype=tf.int64)

    for ac in ac_spectrum:
      with summary_writer.as_default():
        with tf.contrib.summary.always_record_summaries():
          tf.contrib.summary.scalar("Autocorrelation", ac, step=plot_step)
      plot_step.assign(plot_step + n_steps)

    if HAS_MATPLOTLIB:
      # Choose a single chain and plot the trajectory
      single_chain = samples_history[:, 0, :]
      xs = single_chain[:100, 0]
      ys = single_chain[:100, 1]
      plt.figure()
      plt.plot(xs, ys, color="orange", marker="o", alpha=0.6)  # Trained chain
      plt.savefig(os.path.join(FLAGS.train_dir, "single_chain.png"))