Exemplo n.º 1
0
 def __init__(self, env_name, actor_id, logdir="results/", start=True):
     env = create_env(env_name)
     self.id = actor_id
     num_actions = env.action_space.n
     self.policy = LSTMPolicy(env.observation_space.shape, num_actions,
                              actor_id)
     self.runner = RunnerThread(env, self.policy, 20)
     self.env = env
     self.logdir = logdir
     if start:
         self.start()
Exemplo n.º 2
0
def train(num_workers, env_name="PongDeterministic-v3"):
    env = create_env(env_name, None, None)
    policy = LSTMPolicy(env.observation_space.shape, env.action_space.n, 0)
    agents = [Runner(env_name, i) for i in range(num_workers)]
    parameters = policy.get_weights()
    gradient_list = [agent.compute_gradient(parameters) for agent in agents]
    steps = 0
    obs = 0
    while True:
        done_id, gradient_list = ray.wait(gradient_list)
        gradient, info = ray.get(done_id)[0]
        policy.model_update(gradient)
        parameters = policy.get_weights()
        steps += 1
        obs += info["size"]
        gradient_list.extend([agents[info["id"]].compute_gradient(parameters)])
    return policy