def testTrainAndEvalWithSpectralNormAndEma(self): gin.bind_parameter("dataset.name", "cifar10") gin.bind_parameter("ModularGAN.g_use_ema", True) gin.bind_parameter("G.spectral_norm", True) options = { "architecture": "resnet_cifar_arch", "batch_size": 2, "disc_iters": 1, "gan_class": ModularGAN, "lambda": 1, "training_steps": 1, "z_dim": 128, } model_dir = self._get_empty_model_dir() run_config = tf.contrib.tpu.RunConfig( model_dir=model_dir, tpu_config=tf.contrib.tpu.TPUConfig(iterations_per_loop=1)) task_manager = runner_lib.TaskManager(model_dir) runner_lib.run_with_schedule("eval_after_train", run_config=run_config, task_manager=task_manager, options=options, use_tpu=False, num_eval_averaging_runs=1, eval_every_steps=None) expected_files = [ "TRAIN_DONE", "checkpoint", "model.ckpt-0.data-00000-of-00001", "model.ckpt-0.index", "model.ckpt-0.meta", "model.ckpt-1.data-00000-of-00001", "model.ckpt-1.index", "model.ckpt-1.meta", "operative_config-0.gin", "tfhub" ] self.assertAllInSet(expected_files, tf.gfile.ListDirectory(model_dir))
def testTrainingIsDeterministic(self, fake_dataset): FLAGS.data_fake_dataset = fake_dataset gin.bind_parameter("dataset.name", "cifar10") options = { "architecture": "resnet_cifar_arch", "batch_size": 2, "disc_iters": 1, "gan_class": ModularGAN, "lambda": 1, "training_steps": 3, "z_dim": 128, } work_dir = self._get_empty_model_dir() for i in range(2): model_dir = os.path.join(work_dir, str(i)) run_config = tf.contrib.tpu.RunConfig(model_dir=model_dir, tf_random_seed=3) task_manager = runner_lib.TaskManager(model_dir) runner_lib.run_with_schedule("train", run_config=run_config, task_manager=task_manager, options=options, use_tpu=False, num_eval_averaging_runs=1) checkpoint_path_0 = os.path.join(work_dir, "0/model.ckpt-3") checkpoint_path_1 = os.path.join(work_dir, "1/model.ckpt-3") checkpoint_reader_0 = tf.train.load_checkpoint(checkpoint_path_0) checkpoint_reader_1 = tf.train.load_checkpoint(checkpoint_path_1) for name, _ in tf.train.list_variables(checkpoint_path_0): tf.logging.info(name) t0 = checkpoint_reader_0.get_tensor(name) t1 = checkpoint_reader_1.get_tensor(name) self.assertAllClose(t0, t1, msg=name)
def testTrainAndEval(self, use_tpu): gin.bind_parameter("dataset.name", "cifar10") options = { "architecture": "resnet_cifar_arch", "batch_size": 2, "disc_iters": 1, "gan_class": ModularGAN, "lambda": 1, "training_steps": 1, "z_dim": 128, } model_dir = FLAGS.test_tmpdir if tf.gfile.Exists(model_dir): tf.gfile.DeleteRecursively(model_dir) run_config = tf.contrib.tpu.RunConfig( model_dir=model_dir, tpu_config=tf.contrib.tpu.TPUConfig(iterations_per_loop=1)) task_manager = runner_lib.TaskManager(model_dir) runner_lib.run_with_schedule("eval_after_train", run_config=run_config, task_manager=task_manager, options=options, use_tpu=use_tpu, num_eval_averaging_runs=1, eval_every_steps=None) expected_files = [ "TRAIN_DONE", "checkpoint", "model.ckpt-0.data-00000-of-00001", "model.ckpt-0.index", "model.ckpt-0.meta", "model.ckpt-1.data-00000-of-00001", "model.ckpt-1.index", "model.ckpt-1.meta", "operative_config-0.gin", "tfhub" ] self.assertAllInSet(expected_files, tf.gfile.ListDirectory(model_dir))
def testWeightInitialization(self, seed1, seed2): gin.bind_parameter("dataset.name", "cifar10") gin.bind_parameter("ModularGAN.g_optimizer_fn", tf.train.GradientDescentOptimizer) options = { "architecture": "resnet_cifar_arch", "batch_size": 2, "disc_iters": 1, "gan_class": ModularGAN, "lambda": 1, "training_steps": 1, "z_dim": 128, } work_dir = self._get_empty_model_dir() seeds = [seed1, seed2] for i in range(2): model_dir = os.path.join(work_dir, str(i)) seed = seeds[i] run_config = tf.contrib.tpu.RunConfig(model_dir=model_dir, tf_random_seed=seed) task_manager = runner_lib.TaskManager(model_dir) runner_lib.run_with_schedule("train", run_config=run_config, task_manager=task_manager, options=options, use_tpu=False) checkpoint_path_0 = os.path.join(work_dir, "0/model.ckpt-0") checkpoint_path_1 = os.path.join(work_dir, "1/model.ckpt-0") checkpoint_reader_0 = tf.train.load_checkpoint(checkpoint_path_0) checkpoint_reader_1 = tf.train.load_checkpoint(checkpoint_path_1) for name, _ in tf.train.list_variables(checkpoint_path_0): tf.logging.info(name) t0 = checkpoint_reader_0.get_tensor(name) t1 = checkpoint_reader_1.get_tensor(name) zero_initialized_vars = [ "bias", "biases", "beta", "moving_mean", "global_step", "global_step_disc" ] one_initialized_vars = ["gamma", "moving_variance"] if any(name.endswith(e) for e in zero_initialized_vars): # Variables that are always initialized to 0. self.assertAllClose(t0, np.zeros_like(t0)) self.assertAllClose(t1, np.zeros_like(t1)) elif any(name.endswith(e) for e in one_initialized_vars): # Variables that are always initialized to 1. self.assertAllClose(t0, np.ones_like(t0)) self.assertAllClose(t1, np.ones_like(t1)) elif seed1 is not None and seed1 == seed2: # Same random seed. self.assertAllClose(t0, t1) else: # Different random seeds. logging.info("name=%s, t0=%s, t1=%s", name, t0, t1) self.assertNotAllClose(t0, t1)
def testTrainAndEvalWithBatchNormAccu(self): gin.bind_parameter("dataset.name", "cifar10") gin.bind_parameter("standardize_batch.use_moving_averages", False) gin.bind_parameter("G.batch_norm_fn", arch_ops.batch_norm) options = { "architecture": "resnet_cifar_arch", "batch_size": 2, "disc_iters": 1, "gan_class": ModularGAN, "lambda": 1, "training_steps": 1, "z_dim": 128, } model_dir = FLAGS.test_tmpdir if tf.gfile.Exists(model_dir): tf.gfile.DeleteRecursively(model_dir) run_config = tf.contrib.tpu.RunConfig( model_dir=model_dir, tpu_config=tf.contrib.tpu.TPUConfig(iterations_per_loop=1)) task_manager = runner_lib.TaskManager(model_dir) # Wrap _UpdateBnAccumulators to only perform one accumulator update step. # Otherwise the test case would time out. orig_update_bn_accumulators = eval_gan_lib._update_bn_accumulators def mock_update_bn_accumulators(sess, generated, num_accu_examples): del num_accu_examples return orig_update_bn_accumulators(sess, generated, num_accu_examples=64) eval_gan_lib._update_bn_accumulators = mock_update_bn_accumulators runner_lib.run_with_schedule("eval_after_train", run_config=run_config, task_manager=task_manager, options=options, use_tpu=False, num_eval_averaging_runs=1, eval_every_steps=None) expected_tfhub_files = [ "checkpoint", "model-with-accu.ckpt.data-00000-of-00001", "model-with-accu.ckpt.index", "model-with-accu.ckpt.meta" ] self.assertAllInSet( expected_tfhub_files, tf.gfile.ListDirectory(os.path.join(model_dir, "tfhub/0")))
def main(unused_argv): logging.info("Gin config: %s\nGin bindings: %s", FLAGS.gin_config, FLAGS.gin_bindings) gin.parse_config_files_and_bindings(FLAGS.gin_config, FLAGS.gin_bindings) if FLAGS.use_tpu is None: #FLAGS.use_tpu = bool(os.environ.get("TPU_NAME", "")) if FLAGS.use_tpu: logging.info("Found TPU %s.", os.environ["TPU_NAME"]) run_config = _get_run_config() task_manager = _get_task_manager() options = runner_lib.get_options_dict() runner_lib.run_with_schedule( schedule=FLAGS.schedule, run_config=run_config, task_manager=task_manager, options=options, use_tpu=FLAGS.use_tpu, num_eval_averaging_runs=FLAGS.num_eval_averaging_runs, eval_every_steps=FLAGS.eval_every_steps) logging.info("I\"m done with my work, ciao!")