step = agent_traj.num_step total_step += step rewards = [np.sum(epi['real_rews']) for epi in epis] mean_rew = np.mean(rewards) pseudo_rewards = [np.sum(epi['rews']) for epi in epis] result_dict['PseudoReward'] = pseudo_rewards logger.record_results(args.log, result_dict, score_file, total_epi, step, total_step, rewards, plot_title=args.env_name) mean_rew = np.mean([np.sum(path['real_rews']) for path in epis]) if mean_rew > max_rew: torch.save(pol.state_dict(), os.path.join( args.log, 'models', 'pol_max.pkl')) torch.save(vf.state_dict(), os.path.join( args.log, 'models', 'vf_max.pkl')) torch.save(discrim.state_dict(), os.path.join( args.log, 'models', 'discrim_max.pkl')) torch.save(optim_pol.state_dict(), os.path.join( args.log, 'models', 'optim_pol_max.pkl')) torch.save(optim_vf.state_dict(), os.path.join( args.log, 'models', 'optim_vf_max.pkl')) torch.save(optim_discrim.state_dict(), os.path.join( args.log, 'models', 'optim_discrim_max.pkl')) max_rew = mean_rew torch.save(pol.state_dict(), os.path.join( args.log, 'models', 'pol_last.pkl')) torch.save(vf.state_dict(), os.path.join( args.log, 'models', 'vf_last.pkl'))
total_step += step rewards = [np.sum(epi['rews']) for epi in epis] mean_rew = np.mean(rewards) logger.record_results(args.log, result_dict, score_file, total_epi, step, total_step, rewards, plot_title=args.env_name) if mean_rew > max_rew: torch.save(pol.state_dict(), os.path.join(args.log, 'models', 'pol_max.pkl')) torch.save(vf.state_dict(), os.path.join(args.log, 'models', 'vf_max.pkl')) torch.save(optim_pol.state_dict(), os.path.join(args.log, 'models', 'optim_pol_max.pkl')) torch.save(optim_vf.state_dict(), os.path.join(args.log, 'models', 'optim_vf_max.pkl')) max_rew = mean_rew torch.save(pol.state_dict(), os.path.join(args.log, 'models', 'pol_last.pkl')) torch.save(vf.state_dict(), os.path.join(args.log, 'models', 'vf_last.pkl')) torch.save(optim_pol.state_dict(), os.path.join(args.log, 'models', 'optim_pol_last.pkl')) torch.save(optim_vf.state_dict(), os.path.join(args.log, 'models', 'optim_vf_last.pkl'))
pseudo_rewards = [np.sum(epi['rews']) for epi in epis] result_dict['PseudoReward'] = pseudo_rewards logger.record_results(args.log, result_dict, score_file, total_epi, step, total_step, rewards, plot_title=args.env_name) mean_rew = np.mean([np.sum(path['real_rews']) for path in epis]) if mean_rew > max_rew: torch.save(pol.state_dict(), os.path.join(args.log, 'models', 'pol_max.pkl')) torch.save(vf.state_dict(), os.path.join(args.log, 'models', 'vf_max.pkl')) if args.rew_type == 'rew': torch.save(rewf.state_dict(), os.path.join(args.log, 'models', 'rewf_max.pkl')) torch.save(shaping_vf.state_dict(), os.path.join(args.log, 'models', 'shaping_vf_max.pkl')) else: torch.save(advf.state_dict(), os.path.join(args.log, 'models', 'advf_max.pkl')) torch.save(optim_pol.state_dict(), os.path.join(args.log, 'models', 'optim_pol_max.pkl')) torch.save(optim_vf.state_dict(), os.path.join(args.log, 'models', 'optim_vf_max.pkl')) torch.save(optim_discrim.state_dict(), os.path.join(args.log, 'models', 'optim_discrim_max.pkl'))
step = agent_traj.num_step total_step += step rewards = [np.sum(epi['real_rews']) for epi in epis] mean_rew = np.mean(rewards) pseudo_rewards = [np.sum(epi['rews']) for epi in epis] result_dict['PseudoReward'] = pseudo_rewards logger.record_results(args.log, result_dict, score_file, total_epi, step, total_step, rewards, plot_title=args.env_name) mean_rew = np.mean([np.sum(path['real_rews']) for path in epis]) if mean_rew > max_rew: torch.save(pol.state_dict(), os.path.join( args.log, 'models', 'pol_max.pkl')) torch.save(vf.state_dict(), os.path.join( args.log, 'models', 'vf_max.pkl')) if args.rew_type == 'rew': torch.save(rewf.state_dict(), os.path.join( args.log, 'models', 'rewf_max.pkl')) torch.save(shaping_vf.state_dict(), os.path.join( args.log, 'models', 'shaping_vf_max.pkl')) else: torch.save(advf.state_dict(), os.path.join( args.log, 'models', 'advf_max.pkl')) torch.save(optim_pol.state_dict(), os.path.join( args.log, 'models', 'optim_pol_max.pkl')) torch.save(optim_vf.state_dict(), os.path.join( args.log, 'models', 'optim_vf_max.pkl')) torch.save(optim_discrim.state_dict(), os.path.join( args.log, 'models', 'optim_discrim_max.pkl'))
logger.record_results(args.log, result_dict, score_file, total_epi, step, total_step, rewards, plot_title=args.env_name) # save models regular intervals steps_as = str(int( int(total_step / args.steps_per_save_models + 1) * args.steps_per_save_models)) if 'prev_as' in locals(): if not prev_as == steps_as: torch.save(pol.state_dict(), os.path.join( args.log, 'models', 'pol_'+steps_as+'.pkl')) torch.save(qf1.state_dict(), os.path.join( args.log, 'models', 'qf1_'+steps_as+'.pkl')) torch.save(qf2.state_dict(), os.path.join( args.log, 'models', 'qf2_'+steps_as+'.pkl')) torch.save(discrim.state_dict(), os.path.join( args.log, 'models', 'discrim_'+steps_as+'.pkl')) torch.save(optim_pol.state_dict(), os.path.join( args.log, 'models', 'optim_pol_'+steps_as+'.pkl')) torch.save(optim_qf1.state_dict(), os.path.join( args.log, 'models', 'optim_qf1_'+steps_as+'.pkl')) torch.save(optim_qf2.state_dict(), os.path.join( args.log, 'models', 'optim_qf2_'+steps_as+'.pkl')) torch.save(optim_discrim.state_dict(), os.path.join( args.log, 'models', 'optim_discrim_'+steps_as+'.pkl')) prev_as = str(int( int(total_step / args.steps_per_save_models + 1) * args.steps_per_save_models)) del on_traj del sampler
rewards = [np.sum(epi['rews']) for epi in epis] mean_rew = np.mean(rewards) logger.record_results(args.log, result_dict, score_file, total_epi, step, total_step, rewards, plot_title=args.env_name) if rank == 0: if mean_rew > max_rew: torch.save(pol.state_dict(), os.path.join(args.log, 'models', 'pol_max.pkl')) torch.save(vf.state_dict(), os.path.join(args.log, 'models', 'vf_max.pkl')) torch.save(optim_pol.state_dict(), os.path.join(args.log, 'models', 'optim_pol_max.pkl')) torch.save(optim_vf.state_dict(), os.path.join(args.log, 'models', 'optim_vf_max.pkl')) max_rew = mean_rew torch.save(pol.state_dict(), os.path.join(args.log, 'models', 'pol_last.pkl')) torch.save(vf.state_dict(), os.path.join(args.log, 'models', 'vf_last.pkl')) torch.save(optim_pol.state_dict(), os.path.join(args.log, 'models', 'optim_pol_last.pkl')) torch.save(optim_vf.state_dict(), os.path.join(args.log, 'models', 'optim_vf_last.pkl'))
int( int(total_step / args.steps_per_save_models + 1) * args.steps_per_save_models)) if 'prev_as' in locals(): if not prev_as == steps_as: torch.save( pol.state_dict(), os.path.join(args.log, 'models', 'pol_' + steps_as + '.pkl')) torch.save( qf1.state_dict(), os.path.join(args.log, 'models', 'qf1_' + steps_as + '.pkl')) torch.save( qf2.state_dict(), os.path.join(args.log, 'models', 'qf2_' + steps_as + '.pkl')) torch.save( discrim.state_dict(), os.path.join(args.log, 'models', 'discrim_' + steps_as + '.pkl')) torch.save( optim_pol.state_dict(), os.path.join(args.log, 'models', 'optim_pol_' + steps_as + '.pkl')) torch.save( optim_qf1.state_dict(), os.path.join(args.log, 'models', 'optim_qf1_' + steps_as + '.pkl')) torch.save( optim_qf2.state_dict(), os.path.join(args.log, 'models', 'optim_qf2_' + steps_as + '.pkl')) torch.save(