示例#1
0
    def act(self, gs: GameState) -> int:
        available_actions = gs.get_available_actions(gs.get_active_player())
        if self.agents is None:
            self.agents = [RandomAgent()] * gs.player_count()
        accumulated_scores = np.zeros((len(available_actions),))

        for i, a in enumerate(available_actions):
            gs_clone = gs.clone()
            gs_clone.step(gs.get_active_player(), a)
            if self.determinist_environment:
                max_scores = run_for_n_games_and_return_max(
                    self.agents, gs_clone, self.epochs_per_action
                )
                accumulated_scores[i] = max_scores[gs.get_active_player()]
            else:
                (total_scores, _, _) = run_for_n_games_and_return_stats(
                    self.agents, gs_clone, self.epochs_per_action
                )
                accumulated_scores[i] = total_scores[gs.get_active_player()]

        # print((accumulated_scores, available_actions[np.argmax(accumulated_scores)]))
        return available_actions[np.argmax(accumulated_scores)]
示例#2
0
from drl_gym.agents import CommandLineAgent, RandomAgent
from drl_gym.environments.tictactoe import TicTacToeGameState
from drl_gym.runners import run_to_the_end

if __name__ == "__main__":
    gs = TicTacToeGameState()
    agent0 = CommandLineAgent()
    agent1 = RandomAgent()

    print(gs)
    run_to_the_end([agent0, agent1], gs)
    print(gs)
示例#3
0
from drl_gym.agents import RandomAgent
from drl_gym.environments import GridWorldGameState
from drl_gym.runners import run_to_the_end

if __name__ == "__main__":
    gs = GridWorldGameState()
    agent = RandomAgent()

    print(gs)
    run_to_the_end([agent], gs)
    print(gs)