示例#1
0
    rewards1 = [np.sum(epi['rews']) for epi in epis1]
    rewards2 = [np.sum(epi['rews']) for epi in epis2]
    mean_rew = np.mean(rewards1 + rewards2)
    logger.record_tabular_misc_stat('Reward1', rewards1)
    logger.record_tabular_misc_stat('Reward2', rewards2)
    logger.record_results(args.log,
                          result_dict,
                          score_file,
                          total_epi,
                          step,
                          total_step,
                          rewards1 + rewards2,
                          plot_title='humanoid')

    if mean_rew > max_rew:
        torch.save(pol.state_dict(),
                   os.path.join(args.log, 'models', 'pol_max.pkl'))
        torch.save(vf.state_dict(),
                   os.path.join(args.log, 'models', 'vf_max.pkl'))
        torch.save(optim_pol.state_dict(),
                   os.path.join(args.log, 'models', 'optim_pol_max.pkl'))
        torch.save(optim_vf.state_dict(),
                   os.path.join(args.log, 'models', 'optim_vf_max.pkl'))
        max_rew = mean_rew

    torch.save(pol.state_dict(),
               os.path.join(args.log, 'models', 'pol_last.pkl'))
    torch.save(vf.state_dict(), os.path.join(args.log, 'models',
                                             'vf_last.pkl'))
    torch.save(optim_pol.state_dict(),
               os.path.join(args.log, 'models', 'optim_pol_last.pkl'))
示例#2
0
    total_epi += traj1.num_epi
    step = traj1.num_step
    total_step += step
    rewards1 = [np.sum(epi['rews']) for epi in epis1]
    rewards2 = [np.sum(epi['rews']) for epi in epis2]
    mean_rew = np.mean(rewards1 + rewards2)
    logger.record_tabular_misc_stat('Reward1', rewards1)
    logger.record_tabular_misc_stat('Reward2', rewards2)
    logger.record_results(args.log, result_dict, score_file,
                          total_epi, step, total_step,
                          rewards1 + rewards2,
                          plot_title='humanoid')

    if mean_rew > max_rew:
        torch.save(pol.state_dict(), os.path.join(
            args.log, 'models', 'pol_max.pkl'))
        torch.save(vf.state_dict(), os.path.join(
            args.log, 'models', 'vf_max.pkl'))
        torch.save(optim_pol.state_dict(), os.path.join(
            args.log, 'models', 'optim_pol_max.pkl'))
        torch.save(optim_vf.state_dict(), os.path.join(
            args.log, 'models', 'optim_vf_max.pkl'))
        max_rew = mean_rew

    torch.save(pol.state_dict(), os.path.join(
        args.log, 'models', 'pol_last.pkl'))
    torch.save(vf.state_dict(), os.path.join(
        args.log, 'models', 'vf_last.pkl'))
    torch.save(optim_pol.state_dict(), os.path.join(
        args.log, 'models', 'optim_pol_last.pkl'))