lr,
                  entropy_const=entropy_const,
                  value_const=val_const,
                  gamma=gamma,
                  _lambda=_lambda,
                  max_norm=max_norm,
                  norm_advs=norm_advs)

if resume:
    updater.net.load_state_dict(torch.load(exp_name + '_net.p'))
    updater.optim.load_state_dict(torch.load(exp_name + '_optim.p'))
updater.optim.zero_grad()

epoch = 0
while True:
    epoch += 1
    print("Begin Epoch", epoch, "– T =", collector.T)
    for rollout in range(n_rollouts):
        data = collector.get_data(render)
        updater.calc_loss(*data, gae, reinforce)
    updater.update_model()
    updater.save_model(net_save_file, optim_save_file)
    updater.print_statistics()
    print("Grad Norm:", updater.norm, "– Avg Action:", np.mean(data[3]))
    print("Average Reward:", collector.avg_reward, end='\n\n')

    # Check for memory leaks
    gc.collect()
    max_mem_used = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
    print("Memory Used: {:.2f} memory\n".format(max_mem_used / 1024))
Exemplo n.º 2
0
T = 0
avg_rew = 0.5
epoch = 0
trial = 0

while T < total_steps:
    epoch += 1
    # Collect Rollout
    data = collector.rollout(n_tsteps)
    avg_rew = .99 * avg_rew + .01 * np.mean(data['rewards'])
    T += n_tsteps * n_envs
    # Update Model
    if evaluate_mode:
        logger.data["Trial " + str(epoch)] = np.asarray(data['rewards'])
    else:
        updater.calc_loss(data)
        if epoch % epochs_per_update == 0:
            print("\nStep:", T, " – Epoch", epoch)
            avg_val = np.mean(data['values'])
            avg_action = np.mean(data['sparse_actions'])
            print("Avg Rew:", avg_rew, " – Avg Act:", avg_action,
                  " – Avg Val:", avg_val)
            updater.update_model(calc_grad=True)

            # Track Stats
            updater.print_stats()
            updater.info["Avg Reward"] = avg_rew
            updater.info["Avg Action"] = avg_action
            updater.info["Avg Value Pred"] = avg_val
            logger.append(updater.info, x_val=T)
            updater.save_model(save_file)