Пример #1
0
def agent_loop(dictionary, lock1, lock2):
    random.seed()
    environment = GridWorldModel()
    agent = Agent(environment)
    agent.Q = dictionary[Q_SHARED_KEY]  # initialize with shared Q

    while environment.step_count < MAX_STEPS_PER_AGENT:
        environment.reset()
        agent.state = environment.get_start_state()
        while True:
            agent.act()
            if environment.step_count % ASYNC_UPDATE_INTERVAL == 0 or environment.is_terminal_state(
            ):
                lock1.acquire()
                q = dictionary[Q_SHARED_KEY]
                # Need to write it back, otherwise the proxy won't pick up the changes.
                dictionary[Q_SHARED_KEY] = np.add(q, agent.dQ)
                lock1.release()
                agent.dQ = np.zeros((GridWorldModel.get_number_of_states(),
                                     GridWorldModel.get_number_of_actions()),
                                    dtype=float)
            if environment.is_terminal_state():
                break

    lock2.acquire()
    combined_rewards = dictionary[REWARDS_KEY]
    agents_rewards = np.array(agent.rewards)
    # ...same here
    dictionary[REWARDS_KEY] = np.add(combined_rewards,
                                     agents_rewards[:MAX_STEPS_PER_AGENT])
    lock2.release()