def test_sarsa_agent(): env = RoRoDeck() env.rows = 12 env.lanes = 12 env.reset() # set all units to equal length env.vehicle_data[4][1] = 2 env.vehicle_data[4][3] = 2 agent = SARSA(env, None) assert len(agent.q_table.keys()) == 0 agent.number_of_episodes = 1 agent.train() assert len(agent.q_table.keys()) == 61
def test_max_action_method_sarsa(): env = RoRoDeck() env.rows = 12 env.lanes = 12 state = env.reset() agent = SARSA(env, None) agent.q_table[state.tobytes()] = np.zeros(4) assert np.count_nonzero(agent.q_table[state.tobytes()]) == 0 agent.q_table[state.tobytes()][2] = 1 agent.q_table[state.tobytes()][3] = 2 assert agent.max_action(state, env.possible_actions) == 3 env.possible_actions = np.array([0, 1, 2]) assert agent.max_action(state, env.possible_actions) == 2