Пример #1
0
    step = agent_traj.num_step
    total_step += step
    rewards = [np.sum(epi['real_rews']) for epi in epis]
    mean_rew = np.mean(rewards)
    pseudo_rewards = [np.sum(epi['rews']) for epi in epis]
    result_dict['PseudoReward'] = pseudo_rewards
    logger.record_results(args.log, result_dict, score_file,
                          total_epi, step, total_step,
                          rewards,
                          plot_title=args.env_name)

    mean_rew = np.mean([np.sum(path['real_rews']) for path in epis])
    if mean_rew > max_rew:
        torch.save(pol.state_dict(), os.path.join(
            args.log, 'models', 'pol_max.pkl'))
        torch.save(vf.state_dict(), os.path.join(
            args.log, 'models', 'vf_max.pkl'))
        torch.save(discrim.state_dict(), os.path.join(
            args.log, 'models', 'discrim_max.pkl'))
        torch.save(optim_pol.state_dict(), os.path.join(
            args.log, 'models', 'optim_pol_max.pkl'))
        torch.save(optim_vf.state_dict(), os.path.join(
            args.log, 'models', 'optim_vf_max.pkl'))
        torch.save(optim_discrim.state_dict(), os.path.join(
            args.log, 'models', 'optim_discrim_max.pkl'))
        max_rew = mean_rew

    torch.save(pol.state_dict(), os.path.join(
        args.log, 'models', 'pol_last.pkl'))
    torch.save(vf.state_dict(), os.path.join(
        args.log, 'models', 'vf_last.pkl'))
    total_step += step
    rewards = [np.sum(epi['rews']) for epi in epis]
    mean_rew = np.mean(rewards)
    logger.record_results(args.log,
                          result_dict,
                          score_file,
                          total_epi,
                          step,
                          total_step,
                          rewards,
                          plot_title=args.env_name)

    if mean_rew > max_rew:
        torch.save(pol.state_dict(),
                   os.path.join(args.log, 'models', 'pol_max.pkl'))
        torch.save(vf.state_dict(),
                   os.path.join(args.log, 'models', 'vf_max.pkl'))
        torch.save(optim_pol.state_dict(),
                   os.path.join(args.log, 'models', 'optim_pol_max.pkl'))
        torch.save(optim_vf.state_dict(),
                   os.path.join(args.log, 'models', 'optim_vf_max.pkl'))
        max_rew = mean_rew

    torch.save(pol.state_dict(),
               os.path.join(args.log, 'models', 'pol_last.pkl'))
    torch.save(vf.state_dict(), os.path.join(args.log, 'models',
                                             'vf_last.pkl'))
    torch.save(optim_pol.state_dict(),
               os.path.join(args.log, 'models', 'optim_pol_last.pkl'))
    torch.save(optim_vf.state_dict(),
               os.path.join(args.log, 'models', 'optim_vf_last.pkl'))
Пример #3
0
    pseudo_rewards = [np.sum(epi['rews']) for epi in epis]
    result_dict['PseudoReward'] = pseudo_rewards
    logger.record_results(args.log,
                          result_dict,
                          score_file,
                          total_epi,
                          step,
                          total_step,
                          rewards,
                          plot_title=args.env_name)

    mean_rew = np.mean([np.sum(path['real_rews']) for path in epis])
    if mean_rew > max_rew:
        torch.save(pol.state_dict(),
                   os.path.join(args.log, 'models', 'pol_max.pkl'))
        torch.save(vf.state_dict(),
                   os.path.join(args.log, 'models', 'vf_max.pkl'))
        if args.rew_type == 'rew':
            torch.save(rewf.state_dict(),
                       os.path.join(args.log, 'models', 'rewf_max.pkl'))
            torch.save(shaping_vf.state_dict(),
                       os.path.join(args.log, 'models', 'shaping_vf_max.pkl'))
        else:
            torch.save(advf.state_dict(),
                       os.path.join(args.log, 'models', 'advf_max.pkl'))
        torch.save(optim_pol.state_dict(),
                   os.path.join(args.log, 'models', 'optim_pol_max.pkl'))
        torch.save(optim_vf.state_dict(),
                   os.path.join(args.log, 'models', 'optim_vf_max.pkl'))
        torch.save(optim_discrim.state_dict(),
                   os.path.join(args.log, 'models', 'optim_discrim_max.pkl'))
Пример #4
0
    step = agent_traj.num_step
    total_step += step
    rewards = [np.sum(epi['real_rews']) for epi in epis]
    mean_rew = np.mean(rewards)
    pseudo_rewards = [np.sum(epi['rews']) for epi in epis]
    result_dict['PseudoReward'] = pseudo_rewards
    logger.record_results(args.log, result_dict, score_file,
                          total_epi, step, total_step,
                          rewards,
                          plot_title=args.env_name)

    mean_rew = np.mean([np.sum(path['real_rews']) for path in epis])
    if mean_rew > max_rew:
        torch.save(pol.state_dict(), os.path.join(
            args.log, 'models', 'pol_max.pkl'))
        torch.save(vf.state_dict(), os.path.join(
            args.log, 'models', 'vf_max.pkl'))
        if args.rew_type == 'rew':
            torch.save(rewf.state_dict(), os.path.join(
                args.log, 'models', 'rewf_max.pkl'))
            torch.save(shaping_vf.state_dict(), os.path.join(
                args.log, 'models', 'shaping_vf_max.pkl'))
        else:
            torch.save(advf.state_dict(), os.path.join(
                args.log, 'models', 'advf_max.pkl'))
        torch.save(optim_pol.state_dict(), os.path.join(
            args.log, 'models', 'optim_pol_max.pkl'))
        torch.save(optim_vf.state_dict(), os.path.join(
            args.log, 'models', 'optim_vf_max.pkl'))
        torch.save(optim_discrim.state_dict(), os.path.join(
            args.log, 'models', 'optim_discrim_max.pkl'))
Пример #5
0
    logger.record_results(args.log, result_dict, score_file,
                          total_epi, step, total_step,
                          rewards,
                          plot_title=args.env_name)

    # save models regular intervals
    steps_as = str(int(
        int(total_step / args.steps_per_save_models + 1) * args.steps_per_save_models))
    if 'prev_as' in locals():
        if not prev_as == steps_as:
            torch.save(pol.state_dict(), os.path.join(
                args.log, 'models', 'pol_'+steps_as+'.pkl'))
            torch.save(qf1.state_dict(), os.path.join(
                args.log, 'models', 'qf1_'+steps_as+'.pkl'))
            torch.save(qf2.state_dict(), os.path.join(
                args.log, 'models', 'qf2_'+steps_as+'.pkl'))
            torch.save(discrim.state_dict(), os.path.join(
                args.log, 'models', 'discrim_'+steps_as+'.pkl'))
            torch.save(optim_pol.state_dict(), os.path.join(
                args.log, 'models', 'optim_pol_'+steps_as+'.pkl'))
            torch.save(optim_qf1.state_dict(), os.path.join(
                args.log, 'models', 'optim_qf1_'+steps_as+'.pkl'))
            torch.save(optim_qf2.state_dict(), os.path.join(
                args.log, 'models', 'optim_qf2_'+steps_as+'.pkl'))
            torch.save(optim_discrim.state_dict(), os.path.join(
                args.log, 'models', 'optim_discrim_'+steps_as+'.pkl'))
    prev_as = str(int(
        int(total_step / args.steps_per_save_models + 1) * args.steps_per_save_models))
    del on_traj
del sampler
Пример #6
0
        rewards = [np.sum(epi['rews']) for epi in epis]
        mean_rew = np.mean(rewards)
        logger.record_results(args.log,
                              result_dict,
                              score_file,
                              total_epi,
                              step,
                              total_step,
                              rewards,
                              plot_title=args.env_name)

    if rank == 0:
        if mean_rew > max_rew:
            torch.save(pol.state_dict(),
                       os.path.join(args.log, 'models', 'pol_max.pkl'))
            torch.save(vf.state_dict(),
                       os.path.join(args.log, 'models', 'vf_max.pkl'))
            torch.save(optim_pol.state_dict(),
                       os.path.join(args.log, 'models', 'optim_pol_max.pkl'))
            torch.save(optim_vf.state_dict(),
                       os.path.join(args.log, 'models', 'optim_vf_max.pkl'))
            max_rew = mean_rew

        torch.save(pol.state_dict(),
                   os.path.join(args.log, 'models', 'pol_last.pkl'))
        torch.save(vf.state_dict(),
                   os.path.join(args.log, 'models', 'vf_last.pkl'))
        torch.save(optim_pol.state_dict(),
                   os.path.join(args.log, 'models', 'optim_pol_last.pkl'))
        torch.save(optim_vf.state_dict(),
                   os.path.join(args.log, 'models', 'optim_vf_last.pkl'))
Пример #7
0
     int(
         int(total_step / args.steps_per_save_models + 1) *
         args.steps_per_save_models))
 if 'prev_as' in locals():
     if not prev_as == steps_as:
         torch.save(
             pol.state_dict(),
             os.path.join(args.log, 'models', 'pol_' + steps_as + '.pkl'))
         torch.save(
             qf1.state_dict(),
             os.path.join(args.log, 'models', 'qf1_' + steps_as + '.pkl'))
         torch.save(
             qf2.state_dict(),
             os.path.join(args.log, 'models', 'qf2_' + steps_as + '.pkl'))
         torch.save(
             discrim.state_dict(),
             os.path.join(args.log, 'models',
                          'discrim_' + steps_as + '.pkl'))
         torch.save(
             optim_pol.state_dict(),
             os.path.join(args.log, 'models',
                          'optim_pol_' + steps_as + '.pkl'))
         torch.save(
             optim_qf1.state_dict(),
             os.path.join(args.log, 'models',
                          'optim_qf1_' + steps_as + '.pkl'))
         torch.save(
             optim_qf2.state_dict(),
             os.path.join(args.log, 'models',
                          'optim_qf2_' + steps_as + '.pkl'))
         torch.save(