def test_train_discriminator(self): """ Make sure that the discriminator can achieve low loss, when not training the generator. """ opts = TestOptions() opts.num_samples = 16 opts.epochs_to_train = 10 with tf.Session() as session: dataset = datasets.FakeRecurrentAdversarialDataset(opts) model = rgan.RecurrentGenerativeAdversarialNetwork( opts, session, dataset) # train only the generator losses = [] for epoch in range(opts.epochs_to_train): for bidx in range(opts.batch_size): samples = np.tile( [.5, .5], (opts.batch_size, opts.sequence_length, 1)) model.dataset.add_generated_samples(samples) loss = model.train_discriminator() losses.append(np.mean(loss)) if SHOW_PLOTS: plt.title("discriminator loss without generator training") plt.plot(losses) plt.show() self.assertTrue(losses[-1] < 1e-2)
def test_circle_dataset(self): opts = TestOptions() opts.sequence_length = 4 opts.num_samples = 4 opts.batch_size = 4 opts.input_dim = 2 d = datasets.FakeRecurrentAdversarialDataset(opts) fake_batch = np.arange( (opts.batch_size * opts.sequence_length * opts.input_dim)) fake_batch = fake_batch.reshape(opts.batch_size, opts.sequence_length, opts.input_dim) d.add_generated_samples(fake_batch) z = d.data['X_train'] for sidx in range(opts.num_samples): plt.plot(z[sidx, :, 0], z[sidx, :, 1]) y = d.data['generated_samples'] for sidx in range(len(y)): plt.plot(y[sidx, :, 0], y[sidx, :, 1]) for x, y in d.next_batch(): print x print y if SHOW_PLOTS: plt.show()
def test_train_generator(self): """ Make sure that the generator can achieve low loss, when not training the discriminator. """ opts = TestOptions() with tf.Session() as session: dataset = datasets.FakeRecurrentAdversarialDataset(opts) model = rgan.RecurrentGenerativeAdversarialNetwork( opts, session, dataset) # train only the generator losses = [] for epoch in range(opts.epochs_to_train): loss = model.train_generator() losses.append(np.mean(loss)) if SHOW_PLOTS: plt.title("generator loss without discriminator training") plt.plot(losses) plt.show() self.assertTrue(losses[-1] < 1e-2) final_dataset = model.dataset.data['generated_samples'] expected_num_samples = opts.epochs_to_train * opts.num_samples self.assertEquals(expected_num_samples, len(final_dataset))
def test_make_fake_dataset(self): opts = TestOptions() opts.sequence_length = 4 opts.num_samples = 32 opts.batch_size = 4 opts.input_dim = 2 d = datasets.FakeRecurrentAdversarialDataset(opts) z = d.data['X_train'] for sidx in range(opts.num_samples): plt.plot(z[sidx, :, 0], z[sidx, :, 1]) if SHOW_PLOTS: plt.show() self.assertEquals(len(d.data['X_train']), opts.num_samples) self.assertEquals(len(d.data['X_train'][0]), opts.sequence_length)
def test_add_generated_samples(self): opts = TestOptions() opts.sequence_length = 2 opts.num_samples = 4 opts.batch_size = 2 opts.input_dim = 2 d = datasets.FakeRecurrentAdversarialDataset(opts) fake_batch = np.arange( (opts.batch_size * opts.sequence_length * opts.input_dim)) fake_batch = fake_batch.reshape(opts.batch_size, opts.sequence_length, opts.input_dim) d.add_generated_samples(fake_batch) d.add_generated_samples(fake_batch) fake_x = d.data['generated_samples'] self.assertEquals(len(fake_x), opts.batch_size * 2) self.assertEquals(len(fake_x[0]), opts.sequence_length)
def test_next_batch(self): opts = TestOptions() opts.sequence_length = 4 opts.num_samples = 32 opts.batch_size = 4 opts.input_dim = 2 d = datasets.FakeRecurrentAdversarialDataset(opts) fake_batch = np.arange( (opts.batch_size * opts.sequence_length * opts.input_dim)) fake_batch = fake_batch.reshape(opts.batch_size, opts.sequence_length, opts.input_dim) d.add_generated_samples(fake_batch) d.add_generated_samples(fake_batch) for (x, y) in d.next_batch(): self.assertEquals( x.shape, (opts.batch_size, opts.sequence_length, opts.input_dim)) self.assertEquals(y.shape, (opts.batch_size, 1))
def test_run_epoch(self): """ Test with both training. """ opts = TestOptions() opts.learning_rate = .00001 #.001 opts.batch_size = 8 opts.train_ratio = 4 # 2 opts.num_samples = 16 # 128 opts.sequence_length = 20 opts.epochs_to_train = 2000 # 500 opts.num_hidden = 128 # 100 opts.z_dim = 3 # 2 opts.reg_scale = 0 # 0 opts.dropout = .9 # 1 opts.dataset_name = 'sine' with tf.Session() as session: dataset = datasets.FakeRecurrentAdversarialDataset(opts) model = rgan.RecurrentGenerativeAdversarialNetwork( opts, session, dataset) saver = tf.train.Saver() saver.restore(session, '../snapshots/{}.weights'.format(opts.dataset_name)) losses = [] TRAIN = False if TRAIN == True: for epoch in range(opts.epochs_to_train): model.run_epoch() if epoch % 50 == 0: saver.save( session, '../snapshots/{}.weights'.format( opts.dataset_name)) samples = model.sample_space() true_samples = model.dataset.data['X_train'] print samples[0:2] with tf.Session() as session: if SHOW_PLOTS: saver.restore( session, '../snapshots/{}.weights'.format(opts.dataset_name)) model.plot_results() fig, ax = plt.subplots() scat = ax.scatter([], [], c='red') def init(): scat.set_offsets([]) return scat, dots_to_show = opts.sequence_length def animate(idx): sample_idx = idx / opts.sequence_length timestep_idx = idx % opts.sequence_length data = samples[sample_idx, timestep_idx:timestep_idx + dots_to_show, :] scat.set_offsets(data) return scat, for idx in range(opts.num_samples): ax.scatter(true_samples[idx, :, 0], true_samples[idx, :, 1], c='blue') animation_steps = len(samples) * opts.sequence_length ani = animation.FuncAnimation(fig, animate, np.arange(animation_steps), interval=10, init_func=init) plt.show() for true in true_samples: plt.scatter(true[:, 0], true[:, 1], c='blue') for gen in samples: plt.scatter(gen[:, 0], gen[:, 1], c='red') plt.show()
def test_run_epoch_twitch_with_pretraining(self): """ Test with both training. """ opts = TestOptions() opts.learning_rate = .005 opts.epoch_multiple_gen = 1 opts.epoch_multiple_dis = 1 opts.batch_size = 64 opts.sequence_length = 4 opts.num_samples = 100000 opts.epochs_to_train = 1000 opts.num_hidden = 256 opts.embed_dim = 64 opts.z_dim = 16 opts.dropout = .9 opts.temperature = 1. opts.sampling_temperature = .5 opts.full_sequence_optimization = True opts.save_every = 1 opts.plot_every = 1 opts.reduce_temperature_every = 1 opts.temperature_reduction_amount = .01 opts.min_temperature = .1 opts.decay_every = 100 opts.decay_ratio = .97 opts.max_norm = 2.0 opts.sentence_limit = 50000 opts.pretrain_epochs = 50 opts.pretrain_learning_rate = .005 opts.save_to_aws = False opts.dataset_name = 'twitch' opts.aws_bucket = 'pgrganxentbaseline' opts.with_baseline = True opts.with_xent = True opts.sample_every = 2 ak = aws_s3_utility.load_key('../keys/access_key.key') sk = aws_s3_utility.load_key('../keys/secret_key.key') bucket = opts.aws_bucket aws_util = aws_s3_utility.S3Utility(ak, sk, bucket) weights_filepath = '../snapshots/{}.weights'.format(opts.dataset_name) weights_filename = '{}.weights'.format(opts.dataset_name) with tf.Session() as session: dataset = datasets.FakeRecurrentAdversarialDataset(opts) model = discrete_rgan.RecurrentDiscreteGenerativeAdversarialNetwork( opts, session, dataset) saver = tf.train.Saver() #saver.restore(session, weights_filepath) # get the param values beforehand params = tf.trainable_variables() param_info = sorted([(p.name, p.eval()) for p in params]) # train losses = [] TRAIN = True if TRAIN == True: for epoch in range(opts.epochs_to_train): if opts.with_xent: model.run_pretrain_epoch() model.run_epoch() if epoch % opts.save_every == 0: saver.save(session, weights_filepath) if opts.save_to_aws: try: aws_util.upload_file(weights_filename, weights_filepath) except: pass if epoch % opts.plot_every == 0: model.plot_results() if opts.save_to_aws: try: aws_util.upload_directory('../media') except: pass if epoch % opts.reduce_temperature_every == 0: opts.temperature -= opts.temperature_reduction_amount opts.temperature = max(opts.min_temperature, opts.temperature) if epoch % opts.sample_every == 0: samples, probs = model.discrete_sample_space() samples = dataset.decode_dataset(samples, real=True) print samples[0] np.savez('../media/samples.npz', samples=samples, probs=probs) # sample linearly from z space if opts.discrete: samples, probs = model.discrete_sample_space() samples = dataset.decode_dataset(samples, real=True) else: samples = model.sample_space() # get the samples in the dataset true_samples = dataset.data['X_train'] true_samples = dataset.decode_dataset(true_samples, real=True) # calculate and report metric total = float(len(samples)) in_real_data_count = 0 less_than_count = 0 for sample in samples: if sample in true_samples: in_real_data_count += 1 if sample[0] < sample[1]: less_than_count += 1 perplexity = 0 #learning_utils.calculate_perplexity(probs) num_display = 30 for (s, p) in zip(samples[:num_display], probs[:num_display]): print "example generated data: {}".format(s) #print "probabilities of those selections: {}".format(p) print "total samples: {}".format(total) print "generated samples also in dataset: {}".format( in_real_data_count) print "percent generated in real dataset: {}%".format( 100 * in_real_data_count / total) print "perplexity of samples: {}".format(perplexity) if SHOW_PLOTS: model.plot_results() # assert that the parameters of the generative model have not changed # at all because they cannot possibly change because they are blocked # from any gradient by a nondifferentiable, discrete sampling operation params_after = tf.trainable_variables() param_after_info = sorted([(p.name, p.eval()) for p in params_after]) total_diff = 0 total_num_params = 0 for (n, vals), (n_after, vals_after) in zip(param_info, param_after_info): print "\n" print n print n_after num_params = len(vals_after.flatten().tolist()) total_num_params += num_params diffs = vals - vals_after diff = np.sum(np.abs(diffs)) total_diff += diff print "average absolute difference: {}%".format( diff / num_params * 100) print "overall average absolute difference: {}%".format( total_diff / total_num_params * 100)
def test_run_epoch(self): """ Test with both training. """ opts = TestOptions() opts.learning_rate = .01 opts.epoch_multiple_gen = 1 opts.epoch_multiple_dis = 5 opts.batch_size = 52 opts.num_samples = 130 opts.epochs_to_train = 1000 opts.num_hidden = 128 opts.embed_dim = 32 opts.z_dim = 16 opts.dropout = 1. opts.temperature = 1. opts.sampling_temperature = .2 opts.full_sequence_optimization = True opts.save_every = 200 opts.plot_every = 100 opts.reduce_temperature_every = 20 opts.temperature_reduction_amount = .01 opts.min_temperature = .1 opts.decay_every = 10 opts.decay_ratio = .96 opts.max_norm = 1.0 opts.pretrain_learning_rate = .01 with tf.Session() as session: dataset = datasets.FakeRecurrentAdversarialDataset(opts) model = discrete_rgan.RecurrentDiscreteGenerativeAdversarialNetwork( opts, session, dataset) saver = tf.train.Saver() saver.restore(session, '../snapshots/{}.weights'.format(opts.dataset_name)) # get the param values beforehand params = tf.trainable_variables() param_info = sorted([(p.name, p.eval()) for p in params]) # train losses = [] TRAIN = False if TRAIN == True: for epoch in range(opts.epochs_to_train): model.run_epoch() if epoch % opts.save_every == 0: saver.save( session, '../snapshots/{}.weights'.format( opts.dataset_name)) if epoch % opts.plot_every == 0: model.plot_results() if epoch % opts.reduce_temperature_every == 0: opts.temperature -= opts.temperature_reduction_amount opts.temperature = max(opts.min_temperature, opts.temperature) # sample linearly from z space if opts.discrete: samples, probs = model.discrete_sample_space() samples = dataset.decode_dataset(samples) else: samples = model.sample_space() # get the samples in the dataset true_samples = dataset.data['X_train'] true_samples = dataset.decode_dataset(true_samples) # calculate and report metric total = float(len(samples)) in_real_data_count = 0 less_than_count = 0 for sample in samples: if sample in true_samples: in_real_data_count += 1 if sample[0] < sample[1]: less_than_count += 1 num_display = 10 for (s, p) in zip(samples[:num_display], probs[:num_display]): print "example generated data: {}".format(s) print "probabilities of those selections: {}".format(p) print "total samples: {}".format(total) print "generated samples also in dataset: {}".format( in_real_data_count) print "percent generated in real dataset: {}%".format( 100 * in_real_data_count / total) print "percent samples[0] < samples[1]: {}%".format( 100 * less_than_count / total) if SHOW_PLOTS: model.plot_results() # assert that the parameters of the generative model have not changed # at all because they cannot possibly change because they are blocked # from any gradient by a nondifferentiable, discrete sampling operation params_after = tf.trainable_variables() param_after_info = sorted([(p.name, p.eval()) for p in params_after]) total_diff = 0 total_num_params = 0 for (n, vals), (n_after, vals_after) in zip(param_info, param_after_info): print "\n" print n print n_after num_params = len(vals_after.flatten().tolist()) total_num_params += num_params diffs = vals - vals_after diff = np.sum(np.abs(diffs)) total_diff += diff print "average absolute difference: {}%".format( diff / num_params * 100) print "overall average absolute difference: {}%".format( total_diff / total_num_params * 100)