def agent_loop(dictionary, lock1, lock2): random.seed() environment = GridWorldModel() agent = Agent(environment) agent.Q = dictionary[Q_SHARED_KEY] # initialize with shared Q while environment.step_count < MAX_STEPS_PER_AGENT: environment.reset() agent.state = environment.get_start_state() while True: agent.act() if environment.step_count % ASYNC_UPDATE_INTERVAL == 0 or environment.is_terminal_state( ): lock1.acquire() q = dictionary[Q_SHARED_KEY] # Need to write it back, otherwise the proxy won't pick up the changes. dictionary[Q_SHARED_KEY] = np.add(q, agent.dQ) lock1.release() agent.dQ = np.zeros((GridWorldModel.get_number_of_states(), GridWorldModel.get_number_of_actions()), dtype=float) if environment.is_terminal_state(): break lock2.acquire() combined_rewards = dictionary[REWARDS_KEY] agents_rewards = np.array(agent.rewards) # ...same here dictionary[REWARDS_KEY] = np.add(combined_rewards, agents_rewards[:MAX_STEPS_PER_AGENT]) lock2.release()