for k in preliminary_env.env_params.keys():
                f.write('\t' + k + ' = ' + str(preliminary_env.env_params[k]) +
                        '\n')

        device = "cpu"

        env = wrap_openai_gym(
            preliminary_env.App(always_render=False, verbose=False))
        # env=wrap_openai_gym(minimal_preliminary.App())

        if ag == 'ppo':
            observation_processor, hidden_layers, action_mapper = default_network_components(
                env)
            agent = PPOAgent(observation_processor,
                             hidden_layers,
                             action_mapper,
                             save_path=agent_params['save_path'],
                             value_lr=agent_params['value_lr'],
                             policy_lr=agent_params['policy_lr']).to(device)

            agent.train(env,
                        episodes=agent_params['episodes'],
                        train_freq=agent_params['train_freq'],
                        eval_freq=agent_params['eval_freq'],
                        render=agent_params['render'],
                        batch_size=agent_params['batch_size'],
                        gamma=agent_params['gamma'],
                        tau=agent_params['tau'],
                        clip=agent_params['clip'],
                        n_steps=agent_params['n_steps'],
                        entropy_coef=agent_params['entropy_coef'],
                        store_agent_every=agent_params['store_agent_every'])
示例#2
0
    f.write('env_params\n')
    for k in gauss_env.env_params.keys():
        f.write('\t' + k + ' = ' + str(gauss_env.env_params[k]) + '\n')

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
device = "cpu"

env = wrap_openai_gym(gauss_env.App(always_render=False, verbose=False))

observation_processor, hidden_layers, action_mapper = default_network_components(
    env)

agent = PPOAgent(observation_processor,
                 hidden_layers,
                 action_mapper,
                 save_path=agent_params['save_path'],
                 value_lr=agent_params['value_lr'],
                 policy_lr=agent_params['policy_lr']).to(device)

if LOAD_AGENT_FROM is not None:
    with open(LOAD_AGENT_FROM, 'rb') as f:
        state_dict = pickle.load(f)
    agent.load_state_dict(state_dict)

    # agent.eval(env,eval_episodes=20,render=True, episodes=agent_params['episodes'],train_freq=agent_params['train_freq'],eval_freq=agent_params['eval_freq'], batch_size=agent_params['batch_size'],
    #             gamma=agent_params['gamma'],tau=agent_params['tau'],clip=agent_params['clip'],n_steps=agent_params['n_steps'],entropy_coef=agent_params['entropy_coef'])

# agent.train(env,episodes=1000,train_freq=2048,eval_freq=50,render=True, batch_size=128,gamma=.99,tau=.95,clip=.2,n_steps=32,entropy_coef=.01)

agent.train(env,
            episodes=agent_params['episodes'],
示例#3
0
from pyforce.env import wrap_openai_gym
from pyforce.nn import default_network_components
from pyforce.agents import PPOAgent
import gym
import torch

device="cuda:0" if torch.cuda.is_available() else "cpu"

env=wrap_openai_gym(gym.make("LunarLanderContinuous-v2"))

observation_processor,hidden_layers,action_mapper=default_network_components(env)

agent=PPOAgent(
    observation_processor,
    hidden_layers,
    action_mapper,
    save_path="./evals/ppo_example",
    value_lr=5e-4,
    policy_lr=5e-4
).to(device)

agent.train(env,episodes=1000,train_freq=2048,eval_freq=50,render=True, batch_size=128,gamma=.99,tau=.95,clip=.2,n_steps=32,entropy_coef=.01)