def test_random_noughts_and_crosses_player_gives_equal_action_probabilities(): nac = NoughtsAndCrosses() player = RandomPlayer(game=nac) action, action_probs = player.choose_action(nac.initial_state, return_probabilities=True) next_states = nac.legal_actions(nac.initial_state) expected_action_probs = { action: 1 / len(next_states) for action in next_states.keys() } for action in expected_action_probs.keys(): np.testing.assert_almost_equal(action_probs[action], expected_action_probs[action])
def test_mcts_noughts_and_crosses_player_gives_optimal_moves( state, optimal_actions): # seed the random number generator. np.random.seed(0) nac = NoughtsAndCrosses() estimator = create_trivial_estimator(nac) player = MCTSPlayer(game=nac, estimator=estimator, mcts_iters=100, c_puct=0.5, tau=1) action, action_probs = player.choose_action(state, return_probabilities=True) print(action_probs) assert max(action_probs, key=action_probs.get) in optimal_actions
"""This program plays noughts and crosses using Monte Carlo Tree Search and a trivial evaluator. For nonterminal states, the evaluator returns the uniform probability distribution over available actions and a value of 0. In a terminal state, we back up the utility returned by the game. """ import numpy as np from alphago.games.noughts_and_crosses import NoughtsAndCrosses from alphago.estimator import create_trivial_estimator from alphago.player import MCTSPlayer if __name__ == "__main__": nac = NoughtsAndCrosses() evaluator = create_trivial_estimator(nac.legal_actions) state = nac.INITIAL_STATE computer_player_no = np.random.choice([1, 2]) computer_player = MCTSPlayer(nac, evaluator, mcts_iters=2000, c_puct=0.5, tau=0.01) human_player_no = 1 if computer_player_no == 2 else 2 print("You are player: {}".format(human_player_no)) while not nac.is_terminal(state): player_no = nac.current_player(state) next_states = nac.legal_actions(state) if player_no == computer_player_no: action = computer_player.choose_action(state) computer_player.update(action) print("Taking action: {}".format(action))