def act(self, gs: GameState) -> int: available_actions = gs.get_available_actions(gs.get_active_player()) if self.agents is None: self.agents = [RandomAgent()] * gs.player_count() accumulated_scores = np.zeros((len(available_actions),)) for i, a in enumerate(available_actions): gs_clone = gs.clone() gs_clone.step(gs.get_active_player(), a) if self.determinist_environment: max_scores = run_for_n_games_and_return_max( self.agents, gs_clone, self.epochs_per_action ) accumulated_scores[i] = max_scores[gs.get_active_player()] else: (total_scores, _, _) = run_for_n_games_and_return_stats( self.agents, gs_clone, self.epochs_per_action ) accumulated_scores[i] = total_scores[gs.get_active_player()] # print((accumulated_scores, available_actions[np.argmax(accumulated_scores)])) return available_actions[np.argmax(accumulated_scores)]
from drl_gym.agents import CommandLineAgent, RandomAgent from drl_gym.environments.tictactoe import TicTacToeGameState from drl_gym.runners import run_to_the_end if __name__ == "__main__": gs = TicTacToeGameState() agent0 = CommandLineAgent() agent1 = RandomAgent() print(gs) run_to_the_end([agent0, agent1], gs) print(gs)
from drl_gym.agents import RandomAgent from drl_gym.environments import GridWorldGameState from drl_gym.runners import run_to_the_end if __name__ == "__main__": gs = GridWorldGameState() agent = RandomAgent() print(gs) run_to_the_end([agent], gs) print(gs)