Пример #1
0
if pool:
    rollout_generator.to_cpu()
rollout_params = np.asanyarray(tuple(param.data for param in rollout_generator.params()))

print('#########################################################################')
print('Start Reinforcement Training ...')

for epoch in range(1, total_epoch):

    print('total batch: ', epoch)

    for step in range(g_steps):
        samples = generator.generate(gen_batch_size, train=True, random_input=True)
        rewards = rollout_generator.get_rewards(samples, discriminator, rollout_num=16, pool=pool, gpu=args.gpu)
        print(rewards[:30])
        loss = generator.reinforcement_step(samples, rewards, g_steps=g_steps, random_input=True)
        gen_optimizer.zero_grads()
        loss.backward()
        gen_optimizer.update()
        print(' Reinforce step {}/{}'.format(step+1, g_steps))

    for i, param in enumerate(generator.params()):
        if pool:
            rollout_params[i] += rollout_update_ratio * (cuda.to_cpu(param.data) - rollout_params[i])
        else:
            rollout_params[i] += rollout_update_ratio * (param.data - rollout_params[i])

    for step in range(d_steps):

        # generate for discriminator
        generate_samples_neg(generator, gen_batch_size, generated_num, negative_file)
Пример #2
0
rollout_params = np.asanyarray(tuple(param.data for param in rollout_generator.params()))

print('#########################################################################\n')
print('Start Reinforcement Training ...')

start = time.time()
for epoch in range(1, args.total_epoch):

    print('total epoch ', epoch)
    tmp = time.time()
    # g-step
    mean_time = 0
    for step in range(args.g_steps):
        samples = generator.generate(batch_size, train=True)
        rewards = rollout_generator.get_rewards(samples, discriminator, pool=pool, gpu=args.gpu)
        loss = generator.reinforcement_step(samples, rewards, g_steps=args.g_steps)
        generator.cleargrads()
        loss.backward()
        gen_optimizer.update()

        duration = time.time() - start
        step_time = time.time() - tmp
        mean_time += step_time
        tmp = time.time()
        sys.stderr.write('\rreinforce step {}/{} time: {} ({:.2f} sec/step)'.format(
            step + 1, args.g_steps, str(datetime.timedelta(seconds=duration)).split('.')[0], step_time))
    else:
        print('\rreinforce step {}/{} time: {} ({:.2f} sec/step)'.format(
            step + 1, args.g_steps, str(datetime.timedelta(seconds=duration)).split('.')[0], mean_time/args.g_steps))

    # update rollout generator