def run_step(agents: List[Agent], gs: GameState): assert (not gs.is_game_over()) active_player_index = gs.get_active_player() old_scores = gs.get_scores().copy() action = agents[active_player_index].act(gs) gs.step(active_player_index, action) new_scores = gs.get_scores() rewards = new_scores - old_scores for i, agent in enumerate(agents): agent.observe(rewards[i], gs.is_game_over(), i)
def run_to_the_end(agents: List[Agent], gs: GameState): while not gs.is_game_over(): run_step(agents, gs)