Exemplo n.º 1
0
Path(save_path).mkdir(exist_ok=True)

run_number = 0
for p in Path(save_path).iterdir():
    if p.is_dir() and p.name.isnumeric():
        if int(p.name) > run_number:
            run_number = int(str(p.name))
run_number += 1
save_path += ('/' + str(run_number))

Path(save_path).mkdir(exist_ok=True)

with open(save_path + '/description.txt', mode='w') as f:
    f.write(description)

agent = TD3Agent(env, save_path=save_path, critic_lr=1e-3,
                 actor_lr=1e-3).to(device)

# agent=TD3Agent(
#     env,
#     save_path="./evals/td3_example",
#     critic_lr=1e-3,
#     actor_lr=1e-3
# ).to(device)

# agent.train(env,100000,train_freq=1,batch_size=100,policy_noise=0.1,policy_noise_clip=.25,gamma=.99, policy_freq=2, tau=0.005,warmup_steps=10000,buffer_size=50000, exp_noise=.1,eval_freq=1, render=True, eval_episodes=1)
agent.train(env,
            500,
            train_freq=1,
            batch_size=800,
            policy_noise=0.1,
            policy_noise_clip=.25,
Exemplo n.º 2
0
with open(file_path_txt, 'w') as f:
    f.write('agent_params\n')
    for k in agent_params.keys():
        f.write('\t' + k + ' = ' + str(agent_params[k]) + '\n')
    f.write('env_params\n')
    for k in sandbox_env.env_params.keys():
        f.write('\t' + k + ' = ' + str(sandbox_env.env_params[k]) + '\n')

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# torch.cuda.set_device(0)
device= "cpu"
env=wrap_openai_gym(sandbox_env.App(always_render=False))

agent=TD3Agent(
    env,
    save_path=agent_params['save_path'],
    critic_lr=agent_params['critic_lr'],
    actor_lr=agent_params['actor_lr']
).to(device)

if LOAD_AGENT_FROM is not None:
    with open(LOAD_AGENT_FROM, 'rb') as f:
        state_dict = pickle.load(f)
    agent.load_state_dict(state_dict)

# agent=TD3Agent(
#     env,
#     save_path="./evals/td3_example",
#     critic_lr=1e-3,
#     actor_lr=1e-3
# ).to(device)
Exemplo n.º 3
0
# from pyforce.nn import default_network_components
from pyforce.agents import TD3Agent
# import gym
import torch

import sandbox_env
# from pyforce import agents

# device="cuda:0" if torch.cuda.is_available() else "cpu"
# # torch.cuda.set_device(0)
device = "cpu"
# print(device)
env = wrap_openai_gym(sandbox_env.App(always_render=True, verbose=False))

agent = TD3Agent(env,
                 save_path="./evals/td3_new",
                 critic_lr=1e-3,
                 actor_lr=1e-3).to(device)

# agent=TD3Agent(
#     env,
#     save_path="./evals/td3_example",
#     critic_lr=1e-3,
#     actor_lr=1e-3
# ).to(device)

# agent.train(env,100000,train_freq=1,batch_size=100,policy_noise=0.1,policy_noise_clip=.25,gamma=.99, policy_freq=2, tau=0.005,warmup_steps=10000,buffer_size=50000, exp_noise=.1,eval_freq=1, render=True, eval_episodes=1)
agent.train(env,
            100000,
            train_freq=1,
            batch_size=800,
            policy_noise=0.1,