for k in preliminary_env.env_params.keys(): f.write('\t' + k + ' = ' + str(preliminary_env.env_params[k]) + '\n') device = "cpu" env = wrap_openai_gym( preliminary_env.App(always_render=False, verbose=False)) # env=wrap_openai_gym(minimal_preliminary.App()) if ag == 'ppo': observation_processor, hidden_layers, action_mapper = default_network_components( env) agent = PPOAgent(observation_processor, hidden_layers, action_mapper, save_path=agent_params['save_path'], value_lr=agent_params['value_lr'], policy_lr=agent_params['policy_lr']).to(device) agent.train(env, episodes=agent_params['episodes'], train_freq=agent_params['train_freq'], eval_freq=agent_params['eval_freq'], render=agent_params['render'], batch_size=agent_params['batch_size'], gamma=agent_params['gamma'], tau=agent_params['tau'], clip=agent_params['clip'], n_steps=agent_params['n_steps'], entropy_coef=agent_params['entropy_coef'], store_agent_every=agent_params['store_agent_every'])
f.write('env_params\n') for k in gauss_env.env_params.keys(): f.write('\t' + k + ' = ' + str(gauss_env.env_params[k]) + '\n') # device="cuda:0" if torch.cuda.is_available() else "cpu" # # torch.cuda.set_device(0) device = "cpu" env = wrap_openai_gym(gauss_env.App(always_render=False, verbose=False)) observation_processor, hidden_layers, action_mapper = default_network_components( env) agent = PPOAgent(observation_processor, hidden_layers, action_mapper, save_path=agent_params['save_path'], value_lr=agent_params['value_lr'], policy_lr=agent_params['policy_lr']).to(device) if LOAD_AGENT_FROM is not None: with open(LOAD_AGENT_FROM, 'rb') as f: state_dict = pickle.load(f) agent.load_state_dict(state_dict) # agent.eval(env,eval_episodes=20,render=True, episodes=agent_params['episodes'],train_freq=agent_params['train_freq'],eval_freq=agent_params['eval_freq'], batch_size=agent_params['batch_size'], # gamma=agent_params['gamma'],tau=agent_params['tau'],clip=agent_params['clip'],n_steps=agent_params['n_steps'],entropy_coef=agent_params['entropy_coef']) # agent.train(env,episodes=1000,train_freq=2048,eval_freq=50,render=True, batch_size=128,gamma=.99,tau=.95,clip=.2,n_steps=32,entropy_coef=.01) agent.train(env, episodes=agent_params['episodes'],
from pyforce.env import wrap_openai_gym from pyforce.nn import default_network_components from pyforce.agents import PPOAgent import gym import torch device="cuda:0" if torch.cuda.is_available() else "cpu" env=wrap_openai_gym(gym.make("LunarLanderContinuous-v2")) observation_processor,hidden_layers,action_mapper=default_network_components(env) agent=PPOAgent( observation_processor, hidden_layers, action_mapper, save_path="./evals/ppo_example", value_lr=5e-4, policy_lr=5e-4 ).to(device) agent.train(env,episodes=1000,train_freq=2048,eval_freq=50,render=True, batch_size=128,gamma=.99,tau=.95,clip=.2,n_steps=32,entropy_coef=.01)