# roll out generator rollout_generator = copy.deepcopy(generator) if pool: rollout_generator.to_cpu() rollout_params = np.asanyarray(tuple(param.data for param in rollout_generator.params())) print('#########################################################################') print('Start Reinforcement Training ...') for epoch in range(1, total_epoch): print('total batch: ', epoch) for step in range(g_steps): samples = generator.generate(gen_batch_size, train=True, random_input=True) rewards = rollout_generator.get_rewards(samples, discriminator, rollout_num=16, pool=pool, gpu=args.gpu) print(rewards[:30]) loss = generator.reinforcement_step(samples, rewards, g_steps=g_steps, random_input=True) gen_optimizer.zero_grads() loss.backward() gen_optimizer.update() print(' Reinforce step {}/{}'.format(step+1, g_steps)) for i, param in enumerate(generator.params()): if pool: rollout_params[i] += rollout_update_ratio * (cuda.to_cpu(param.data) - rollout_params[i]) else: rollout_params[i] += rollout_update_ratio * (param.data - rollout_params[i]) for step in range(d_steps):
# test test_loss = [] for _ in range(test_num // batch_size): batch = arasuji.get_test_data(batch_size) g_loss = generator.pretrain_step(batch) test_loss.append(float(g_loss.data)) test_count += 1 print('\npre-train epoch {} train_loss {} test_loss {}'.format(epoch, np.mean(pre_train_loss), np.mean(test_loss))) summary = sess.run(train_loss_summary, feed_dict={loss_: np.mean(pre_train_loss)}) summary_writer.add_summary(summary, test_count) summary = sess.run(test_loss_summary, feed_dict={loss_: np.mean(test_loss)}) summary_writer.add_summary(summary, test_count) samples = generator.generate(10, train=False) with open(os.path.join(out_dir, "generated_sample_pretrain.txt"), 'a', encoding='utf-8') as f: f.write('\npre-train epoch {} train_loss {} test_loss {} \n'.format(epoch, np.mean(pre_train_loss), np.mean(test_loss))) for x in samples: f.write(''.join([arasuji.vocab[w] for w in x]) + '\n') serializers.save_hdf5(os.path.join(out_dir, "models", "gen_pretrain.model"), generator) else: # test test_loss = [] for _ in range(test_num // batch_size): batch = arasuji.get_test_data(batch_size) g_loss = generator.pretrain_step(batch) test_loss.append(float(g_loss.data))
for i in range(0, test_num, batch_size): batch = test_comment_data[perm[i:i + batch_size]] tag_batch = test_tag_data[perm[i:i + batch_size]] g_loss = generator.pretrain_step(batch, tag_batch) test_loss.append(float(g_loss.data)) test_count += 1 print('\npre-train epoch {} train_loss {} test_loss {}'.format(epoch, np.mean(pre_train_loss), np.mean(test_loss))) summary = sess.run(train_loss_summary, feed_dict={loss_: np.mean(pre_train_loss)}) summary_writer.add_summary(summary, test_count) summary = sess.run(test_loss_summary, feed_dict={loss_: np.mean(test_loss)}) summary_writer.add_summary(summary, test_count) samples = generator.generate(10, train=False) with open(os.path.join(out_dir, "generated_sample_pretrain.txt"), 'a', encoding='utf-8') as f: f.write('\npre-train epoch {} train_loss {} test_loss {} \n'.format(epoch, np.mean(pre_train_loss), np.mean(test_loss))) for x in samples: f.write(''.join([vocab[w] for w in x]) + '\n') if epoch % 1 == 0: serializers.save_hdf5(os.path.join(out_dir, "models", "gen_pretrain_{}.model".format(epoch)), generator) batch_size /= 20 else: # test test_loss = [] perm = np.random.permutation(test_num) for i in range(0, test_num, batch_size): batch = test_comment_data[perm[i:i + batch_size]]
summary_writer.add_summary(summary, test_count) summary = sess.run(train_kl_loss_summary, feed_dict={loss_: np.mean(sum_kl_loss)}) summary_writer.add_summary(summary, test_count) summary = sess.run(test_loss_summary, feed_dict={loss_: np.mean(test_loss)}) summary_writer.add_summary(summary, test_count) summary = sess.run(test_g_loss_summary, feed_dict={loss_: np.mean(sum_test_g_loss)}) summary_writer.add_summary(summary, test_count) summary = sess.run(test_kl_loss_summary, feed_dict={loss_: np.mean(sum_test_kl_loss)}) summary_writer.add_summary(summary, test_count) samples = generator.generate(10, train=False) with open(os.path.join(out_dir, "generated_sample_pretrain.txt"), 'a', encoding='utf-8') as f: f.write('\npre-train epoch {} train_loss {} test_loss {} \n'. format(epoch, np.mean(pre_train_loss), np.mean(test_loss))) for x in samples: f.write(''.join([vocab[w] for w in x]) + '\n') else: print('\npre-train epoch {} train_loss {} test_loss {}'.format( epoch, np.mean(pre_train_loss), np.mean(test_loss))) summary = sess.run(train_loss_summary, feed_dict={loss_: np.mean(pre_train_loss)}) summary_writer.add_summary(summary, test_count)
# roll out generator rollout_generator = copy.deepcopy(generator) if pool: rollout_generator.to_cpu() rollout_params = np.asanyarray(tuple(param.data for param in rollout_generator.params())) print('#########################################################################') print('Start Reinforcement Training ...') for epoch in range(1, total_epoch): print('total batch: ', epoch) for step in range(g_steps): samples = generator.generate(gen_batch_size, train=True) rewards = rollout_generator.get_rewards(samples, discriminator, rollout_num=16, pool=pool, gpu=args.gpu) print(rewards[:30]) loss = generator.reinforcement_step(samples, rewards, g_steps=g_steps) gen_optimizer.zero_grads() loss.backward() gen_optimizer.update() print(' Reinforce step {}/{}'.format(step+1, g_steps)) for i, param in enumerate(generator.params()): if pool: rollout_params[i] += rollout_update_ratio * (cuda.to_cpu(param.data) - rollout_params[i]) else: rollout_params[i] += rollout_update_ratio * (param.data - rollout_params[i]) for step in range(d_steps):