Пример #1
0
def parse(boardstr):
    '''Parses a board into a gamestate, and returns the location of any moves
    marked with anything other than 'B', 'X', '#', 'W', 'O', or '.'

    Rows are separated by '|', spaces are ignored.

    '''

    boardstr = boardstr.replace(' ', '')
    board_size = max(boardstr.index('|'), boardstr.count('|'))

    st = GameState(size=board_size)
    moves = {}

    for row, rowstr in enumerate(boardstr.split('|')):
        for col, c in enumerate(rowstr):
            if c == '.':
                continue  # ignore empty spaces
            elif c in 'BX#':
                st.do_move((row, col), color=BLACK)
            elif c in 'WO':
                st.do_move((row, col), color=WHITE)
            else:
                # move reference
                assert c not in moves, "{} already used as a move marker".format(
                    c)
                moves[c] = (row, col)

    return st, moves
Пример #2
0
    def test_positional_superko(self):
        move_list = [(0, 3), (0, 4), (1, 3), (1, 4), (2, 3), (2, 4), (2, 2),
                     (3, 4), (2, 1), (3, 3), (3, 1), (3, 2), (3, 0), (4, 2),
                     (1, 1), (4, 1), (8, 0), (4, 0), (8, 1), (0, 2), (8, 2),
                     (0, 1), (8, 3), (1, 0), (8, 4), (2, 0), (0, 0)]

        #   0 1 2 3 4 5 6 7 8 9
        # 0 . W W B W . . . . .
        # 1 W B . B W . . . . .
        # 2 W B B B W . . . . .
        # 3 B B W W W . . . . .
        # 4 W W W . . . . . . .
        # 5 . . . . . . . . . .
        # 6 . . . . . . . . . .
        # 7 . . . . . . . . . .
        # 8 B B B B B . . . . .
        # 9 . . . . . . . . . .

        gs = GameState(size=9)
        for move in move_list:
            gs.do_move(move)
        self.assertTrue(gs.is_legal((1, 0)))

        gs = GameState(size=9, enforce_superko=True)
        for move in move_list:
            gs.do_move(move)
        self.assertFalse(gs.is_legal((1, 0)))
Пример #3
0
        def run_and_get_new_weights(init_weights, win0, win1):
            state = GameState(size=19)
            policy = CNNPolicy.load_model(
                os.path.join('test_data', 'minimodel.json'))
            policy.model.set_weights(init_weights)
            optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2)
            policy.model.compile(loss=log_loss, optimizer=optimizer)

            # Make moves on the state and get trainable (state, action) pairs from them.
            moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)]
            state_tensors = []
            action_tensors = []
            for m in moves:
                (st_tensor,
                 mv_tensor) = _make_training_pair(state, m,
                                                  policy.preprocessor)
                state_tensors.append(st_tensor)
                action_tensors.append(mv_tensor)
                state.do_move(m)

            for i, (s, a) in enumerate(zip(state_tensors, action_tensors)):
                # Put even state/action pairs in game 0, odd ones in game 1.
                game_idx = i % 2
                optimizer.set_current_game(game_idx)
                is_last_move = i + 2 >= len(moves)
                if is_last_move:
                    if game_idx == 0:
                        optimizer.set_result(game_idx, win0)
                    else:
                        optimizer.set_result(game_idx, win1)
                # train_on_batch accumulates gradients, and should only cause a change to parameters
                # on the first call after the final set_result() call
                policy.model.train_on_batch(s, a)
            return policy.model.get_weights()
Пример #4
0
 def test_probabilistic_player(self):
     gs = GameState()
     policy = CNNPolicy(["board", "ones", "turns_since"])
     player = ProbabilisticPolicyPlayer(policy)
     for i in range(20):
         move = player.get_move(gs)
         self.assertIsNotNone(move)
         gs.do_move(move)
Пример #5
0
    def testApplyAndResetOnGamesFinished(self):
        policy = CNNPolicy.load_model(
            os.path.join('test_data', 'minimodel.json'))
        state = GameState(size=19)
        optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2)
        policy.model.compile(loss=log_loss, optimizer=optimizer)

        # Helper to check initial conditions of the optimizer.
        def assertOptimizerInitialConditions():
            for v in optimizer.gradient_sign:
                self.assertEqual(K.eval(v), 0)
            self.assertEqual(K.eval(optimizer.running_games), 2)

        initial_parameters = policy.model.get_weights()

        def assertModelEffect(changed):
            any_change = False
            for cur, init in zip(policy.model.get_weights(),
                                 initial_parameters):
                if not np.allclose(init, cur):
                    any_change = True
                    break
            self.assertEqual(any_change, changed)

        assertOptimizerInitialConditions()

        # Make moves on the state and get trainable (state, action) pairs from them.
        state_tensors = []
        action_tensors = []
        moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)]
        for m in moves:
            (st_tensor,
             mv_tensor) = _make_training_pair(state, m, policy.preprocessor)
            state_tensors.append(st_tensor)
            action_tensors.append(mv_tensor)
            state.do_move(m)

        for i, (s, a) in enumerate(zip(state_tensors, action_tensors)):
            # Even moves in game 0, odd moves in game 1
            game_idx = i % 2
            optimizer.set_current_game(game_idx)
            is_last_move = i + 2 >= len(moves)
            if is_last_move:
                # Mark game 0 as a win and game 1 as a loss.
                optimizer.set_result(game_idx, game_idx == 0)
            else:
                # Games not finished yet; assert no change to optimizer state.
                assertOptimizerInitialConditions()
            # train_on_batch accumulates gradients, and should only cause a change to parameters
            # on the first call after the final set_result() call
            policy.model.train_on_batch(s, a)
            if i + 1 < len(moves):
                assertModelEffect(changed=False)
            else:
                assertModelEffect(changed=True)
        # Once both games finished, the last call to train_on_batch() should have triggered a reset
        # to the optimizer parameters back to initial conditions.
        assertOptimizerInitialConditions()
Пример #6
0
 def test_eye_recursion(self):
     # a checkerboard pattern of black is 'technically' all true eyes
     # mutually supporting each other
     gs = GameState(7)
     for x in range(gs.size):
         for y in range(gs.size):
             if (x + y) % 2 == 1:
                 gs.do_move((x, y), go.BLACK)
     self.assertTrue(gs.is_eye((0, 0), go.BLACK))
Пример #7
0
 def test_sensible_greedy(self):
     gs = GameState()
     policy = CNNPolicy(["board", "ones", "turns_since"])
     player = GreedyPolicyPlayer(policy)
     empty = (10, 10)
     for x in range(19):
         for y in range(19):
             if (x, y) != empty:
                 gs.do_move((x, y), go.BLACK)
     gs.current_player = go.BLACK
     self.assertIsNone(player.get_move(gs))
Пример #8
0
    def test_copy_maintains_shared_sets(self):
        gs = GameState(7)
        gs.do_move((4, 4), go.BLACK)
        gs.do_move((4, 5), go.BLACK)

        # assert that gs has *the same object* referenced by group/liberty sets
        self.assertTrue(gs.group_sets[4][5] is gs.group_sets[4][4])
        self.assertTrue(gs.liberty_sets[4][5] is gs.liberty_sets[4][4])

        gs_copy = gs.copy()
        self.assertTrue(gs_copy.group_sets[4][5] is gs_copy.group_sets[4][4])
        self.assertTrue(
            gs_copy.liberty_sets[4][5] is gs_copy.liberty_sets[4][4])
Пример #9
0
    def test_simple_eye(self):

        # create a black eye in top left (1, 1), white in bottom right (5, 5)

        gs = GameState(size=7)
        gs.do_move((1, 0))  # B
        gs.do_move((5, 4))  # W
        gs.do_move((2, 1))  # B
        gs.do_move((6, 5))  # W
        gs.do_move((1, 2))  # B
        gs.do_move((5, 6))  # W
        gs.do_move((0, 1))  # B
        gs.do_move((4, 5))  # W

        # test black eye top left
        self.assertTrue(gs.is_eyeish((1, 1), go.BLACK))
        self.assertFalse(gs.is_eyeish((1, 1), go.WHITE))

        # test white eye bottom right
        self.assertTrue(gs.is_eyeish((5, 5), go.WHITE))
        self.assertFalse(gs.is_eyeish((5, 5), go.BLACK))

        # test no eye in other random positions
        self.assertFalse(gs.is_eyeish((1, 0), go.BLACK))
        self.assertFalse(gs.is_eyeish((1, 0), go.WHITE))
        self.assertFalse(gs.is_eyeish((2, 2), go.BLACK))
        self.assertFalse(gs.is_eyeish((2, 2), go.WHITE))
Пример #10
0
 def test_snapback_is_not_ko(self):
     gs = GameState(size=5)
     # B X W B .
     # W W B . .
     # . . . . .
     # . . . . .
     # . . . . .
     # imagine black plays at 'X' capturing the white stone at (2, 0).
     # White may play again at (2, 0) to capture the black stones
     # at (0, 0), (1, 0). this is a 'snapback' not 'ko'
     # since it doesn't return the game to a previous position
     B = [(0, 0), (2, 1), (3, 0)]
     W = [(0, 1), (1, 1), (2, 0)]
     for (b, w) in zip(B, W):
         gs.do_move(b)
         gs.do_move(w)
     # do the capture of the single white stone
     gs.do_move((1, 0))
     # there should be no ko
     self.assertIsNone(gs.ko)
     self.assertTrue(gs.is_legal((2, 0)))
     # now play the snapback
     gs.do_move((2, 0))
     # check that the numbers worked out
     self.assertEqual(gs.num_black_prisoners, 2)
     self.assertEqual(gs.num_white_prisoners, 1)
Пример #11
0
    def test_true_eye(self):
        gs = GameState(size=7)
        gs.do_move((1, 0), go.BLACK)
        gs.do_move((0, 1), go.BLACK)

        # false eye at 0, 0
        self.assertTrue(gs.is_eyeish((0, 0), go.BLACK))
        self.assertFalse(gs.is_eye((0, 0), go.BLACK))

        # make it a true eye by turning the corner (1, 1) into an eye itself
        gs.do_move((1, 2), go.BLACK)
        gs.do_move((2, 1), go.BLACK)
        gs.do_move((2, 2), go.BLACK)
        gs.do_move((0, 2), go.BLACK)

        self.assertTrue(gs.is_eyeish((0, 0), go.BLACK))
        self.assertTrue(gs.is_eye((0, 0), go.BLACK))
        self.assertTrue(gs.is_eye((1, 1), go.BLACK))
Пример #12
0
    def test_neighbors_edge_cases(self):

        st = GameState()
        st.do_move((0, 0))  # B B . . . . .
        st.do_move((5, 5))  # B W . . . . .
        st.do_move((0, 1))  # . . . . . . .
        st.do_move((6, 6))  # . . . . . . .
        st.do_move((1, 0))  # . . . . . W .
        st.do_move((1, 1))  # . . . . . . W

        # get_group in the corner
        self.assertEqual(len(st.get_group((0, 0))), 3, "group size in corner")

        # get_group of an empty space
        self.assertEqual(len(st.get_group((4, 4))), 0,
                         "group size of empty space")

        # get_group of a single piece
        self.assertEqual(len(st.get_group((5, 5))), 1,
                         "group size of single piece")
Пример #13
0
    def test_liberties_after_capture(self):
        # creates 3x3 black group in the middle, that is then all captured
        # ...then an assertion is made that the resulting liberties after
        # capture are the same as if the group had never been there
        gs_capture = GameState(7)
        gs_reference = GameState(7)
        # add in 3x3 black stones
        for x in range(2, 5):
            for y in range(2, 5):
                gs_capture.do_move((x, y), go.BLACK)
        # surround the black group with white stones
        # and set the same white stones in gs_reference
        for x in range(2, 5):
            gs_capture.do_move((x, 1), go.WHITE)
            gs_capture.do_move((x, 5), go.WHITE)
            gs_reference.do_move((x, 1), go.WHITE)
            gs_reference.do_move((x, 5), go.WHITE)
        gs_capture.do_move((1, 1), go.WHITE)
        gs_reference.do_move((1, 1), go.WHITE)
        for y in range(2, 5):
            gs_capture.do_move((1, y), go.WHITE)
            gs_capture.do_move((5, y), go.WHITE)
            gs_reference.do_move((1, y), go.WHITE)
            gs_reference.do_move((5, y), go.WHITE)

        # board configuration and liberties of gs_capture and of gs_reference should be identical
        self.assertTrue(np.all(gs_reference.board == gs_capture.board))
        self.assertTrue(
            np.all(gs_reference.liberty_counts == gs_capture.liberty_counts))
Пример #14
0
    def test_standard_ko(self):
        # . B . .
        # B X B .
        # W B W .
        # . W . .
        gs = GameState(size=9)
        gs.do_move((1, 0))  # B
        gs.do_move((2, 0))  # W
        gs.do_move((2, 1))  # B
        gs.do_move((3, 1))  # W
        gs.do_move((1, 2))  # B
        gs.do_move((2, 2))  # W
        gs.do_move((0, 1))  # B

        gs.do_move((1, 1))  # W trigger capture and ko

        self.assertEqual(gs.num_black_prisoners, 1)
        self.assertEqual(gs.num_white_prisoners, 0)

        self.assertFalse(gs.is_legal((2, 1)))

        gs.do_move((5, 5))
        gs.do_move((5, 6))

        self.assertTrue(gs.is_legal((2, 1)))
Пример #15
0
class TestLiberties(unittest.TestCase):
    def setUp(self):

        #   0 1 2 3 4 5 6 7 8 9 A B
        # 0 . . . . . . . . . . . .
        # 1 . . . . . . . . . . . .
        # 2 . . . . . . . . . . . .
        # 3 . . . . . . . . . . . .
        # 4 . . . . . B B . . . . .
        # 5 . . . . . W B . . . . .
        # 6 . . . . . . B . . . . .
        # 7 . . . . . . . . . . . .
        # 8 . . . . . . . . . . . .
        # 9 . . . . . . . . . . W .
        # A . . . . . . . . . . W W
        # B . . . . . . . . . . . .
        self.s = GameState()
        self.s.do_move((4, 5))
        self.s.do_move((5, 5))
        self.s.do_move((5, 6))
        self.s.do_move((10, 10))
        self.s.do_move((4, 6))
        self.s.do_move((10, 11))
        self.s.do_move((6, 6))
        self.s.do_move((9, 10))

    def test_curr_liberties(self):
        self.assertEqual(self.s.liberty_counts[5][5], 2)
        self.assertEqual(self.s.liberty_counts[4][5], 8)
        self.assertEqual(self.s.liberty_counts[5][6], 8)

    def test_neighbors_edge_cases(self):

        st = GameState()
        st.do_move((0, 0))  # B B . . . . .
        st.do_move((5, 5))  # B W . . . . .
        st.do_move((0, 1))  # . . . . . . .
        st.do_move((6, 6))  # . . . . . . .
        st.do_move((1, 0))  # . . . . . W .
        st.do_move((1, 1))  # . . . . . . W

        # get_group in the corner
        self.assertEqual(len(st.get_group((0, 0))), 3, "group size in corner")

        # get_group of an empty space
        self.assertEqual(len(st.get_group((4, 4))), 0,
                         "group size of empty space")

        # get_group of a single piece
        self.assertEqual(len(st.get_group((5, 5))), 1,
                         "group size of single piece")
Пример #16
0
class TestMCTS(unittest.TestCase):
    def setUp(self):
        self.gs = GameState()
        self.mcts = MCTS(dummy_value, dummy_policy, dummy_rollout, n_playout=2)

    def _count_expansions(self):
        """Helper function to count the number of expansions past the root using the dummy policy
        """
        node = self.mcts._root
        expansions = 0
        # Loop over actions in decreasing probability.
        for action, _ in sorted(dummy_policy(self.gs),
                                key=lambda (a, p): p,
                                reverse=True):
            if action in node._children:
                expansions += 1
                node = node._children[action]
            else:
                break
        return expansions

    def test_playout(self):
        self.mcts._playout(self.gs.copy(), 8)
        # Assert that the most likely child was visited (according to the dummy policy below).
        self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits)
        # Assert that the search depth expanded nodes 8 times.
        self.assertEqual(8, self._count_expansions())

    def test_playout_with_pass(self):
        # Test that playout handles the end of the game (i.e. passing/no moves). Mock this by
        # creating a policy that returns nothing after 4 moves.
        def stop_early_policy(state):
            if len(state.history) <= 4:
                return dummy_policy(state)
            else:
                return []

        self.mcts = MCTS(dummy_value,
                         stop_early_policy,
                         stop_early_policy,
                         n_playout=2)
        self.mcts._playout(self.gs.copy(), 8)
        # Assert that (18, 18) and (18, 17) are still only visited once.
        self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits)
        # Assert that no expansions happened after reaching the "end" in 4 moves.
        self.assertEqual(5, self._count_expansions())

    def test_get_move(self):
        move = self.mcts.get_move(self.gs)
        self.mcts.update_with_move(move)
        # success if no errors

    def test_update_with_move(self):
        move = self.mcts.get_move(self.gs)
        self.gs.do_move(move)
        self.mcts.update_with_move(move)
        # Assert that the new root still has children.
        self.assertTrue(len(self.mcts._root._children) > 0)
        # Assert that the new root has no parent (the rest of the tree will be garbage collected).
        self.assertIsNone(self.mcts._root._parent)
        # Assert that the next best move according to the root is (18, 17), according to the
        # dummy policy below.
        self.assertEqual((18, 17), self.mcts._root.select()[0])