示例#1
0
def test_random_noughts_and_crosses_player_gives_equal_action_probabilities():
    nac = NoughtsAndCrosses()
    player = RandomPlayer(game=nac)
    action, action_probs = player.choose_action(nac.initial_state,
                                                return_probabilities=True)

    next_states = nac.legal_actions(nac.initial_state)
    expected_action_probs = {
        action: 1 / len(next_states)
        for action in next_states.keys()
    }

    for action in expected_action_probs.keys():
        np.testing.assert_almost_equal(action_probs[action],
                                       expected_action_probs[action])
示例#2
0
def test_mcts_noughts_and_crosses_player_gives_optimal_moves(
        state, optimal_actions):
    # seed the random number generator.
    np.random.seed(0)

    nac = NoughtsAndCrosses()
    estimator = create_trivial_estimator(nac)
    player = MCTSPlayer(game=nac,
                        estimator=estimator,
                        mcts_iters=100,
                        c_puct=0.5,
                        tau=1)
    action, action_probs = player.choose_action(state,
                                                return_probabilities=True)
    print(action_probs)

    assert max(action_probs, key=action_probs.get) in optimal_actions
示例#3
0
"""This program plays noughts and crosses using Monte Carlo Tree Search and a
trivial evaluator. For nonterminal states, the evaluator returns the uniform
probability distribution over available actions and a value of 0. In a terminal
state, we back up the utility returned by the game.
"""
import numpy as np

from alphago.games.noughts_and_crosses import NoughtsAndCrosses
from alphago.estimator import create_trivial_estimator
from alphago.player import MCTSPlayer
if __name__ == "__main__":

    nac = NoughtsAndCrosses()
    evaluator = create_trivial_estimator(nac.legal_actions)

    state = nac.INITIAL_STATE
    computer_player_no = np.random.choice([1, 2])
    computer_player = MCTSPlayer(nac,
                                 evaluator,
                                 mcts_iters=2000,
                                 c_puct=0.5,
                                 tau=0.01)
    human_player_no = 1 if computer_player_no == 2 else 2
    print("You are player: {}".format(human_player_no))
    while not nac.is_terminal(state):
        player_no = nac.current_player(state)
        next_states = nac.legal_actions(state)
        if player_no == computer_player_no:
            action = computer_player.choose_action(state)
            computer_player.update(action)
            print("Taking action: {}".format(action))