def parse(boardstr): '''Parses a board into a gamestate, and returns the location of any moves marked with anything other than 'B', 'X', '#', 'W', 'O', or '.' Rows are separated by '|', spaces are ignored. ''' boardstr = boardstr.replace(' ', '') board_size = max(boardstr.index('|'), boardstr.count('|')) st = GameState(size=board_size) moves = {} for row, rowstr in enumerate(boardstr.split('|')): for col, c in enumerate(rowstr): if c == '.': continue # ignore empty spaces elif c in 'BX#': st.do_move((row, col), color=BLACK) elif c in 'WO': st.do_move((row, col), color=WHITE) else: # move reference assert c not in moves, "{} already used as a move marker".format( c) moves[c] = (row, col) return st, moves
def test_positional_superko(self): move_list = [(0, 3), (0, 4), (1, 3), (1, 4), (2, 3), (2, 4), (2, 2), (3, 4), (2, 1), (3, 3), (3, 1), (3, 2), (3, 0), (4, 2), (1, 1), (4, 1), (8, 0), (4, 0), (8, 1), (0, 2), (8, 2), (0, 1), (8, 3), (1, 0), (8, 4), (2, 0), (0, 0)] # 0 1 2 3 4 5 6 7 8 9 # 0 . W W B W . . . . . # 1 W B . B W . . . . . # 2 W B B B W . . . . . # 3 B B W W W . . . . . # 4 W W W . . . . . . . # 5 . . . . . . . . . . # 6 . . . . . . . . . . # 7 . . . . . . . . . . # 8 B B B B B . . . . . # 9 . . . . . . . . . . gs = GameState(size=9) for move in move_list: gs.do_move(move) self.assertTrue(gs.is_legal((1, 0))) gs = GameState(size=9, enforce_superko=True) for move in move_list: gs.do_move(move) self.assertFalse(gs.is_legal((1, 0)))
def run_and_get_new_weights(init_weights, win0, win1): state = GameState(size=19) policy = CNNPolicy.load_model( os.path.join('test_data', 'minimodel.json')) policy.model.set_weights(init_weights) optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2) policy.model.compile(loss=log_loss, optimizer=optimizer) # Make moves on the state and get trainable (state, action) pairs from them. moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)] state_tensors = [] action_tensors = [] for m in moves: (st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor) state_tensors.append(st_tensor) action_tensors.append(mv_tensor) state.do_move(m) for i, (s, a) in enumerate(zip(state_tensors, action_tensors)): # Put even state/action pairs in game 0, odd ones in game 1. game_idx = i % 2 optimizer.set_current_game(game_idx) is_last_move = i + 2 >= len(moves) if is_last_move: if game_idx == 0: optimizer.set_result(game_idx, win0) else: optimizer.set_result(game_idx, win1) # train_on_batch accumulates gradients, and should only cause a change to parameters # on the first call after the final set_result() call policy.model.train_on_batch(s, a) return policy.model.get_weights()
def test_probabilistic_player(self): gs = GameState() policy = CNNPolicy(["board", "ones", "turns_since"]) player = ProbabilisticPolicyPlayer(policy) for i in range(20): move = player.get_move(gs) self.assertIsNotNone(move) gs.do_move(move)
def testApplyAndResetOnGamesFinished(self): policy = CNNPolicy.load_model( os.path.join('test_data', 'minimodel.json')) state = GameState(size=19) optimizer = BatchedReinforcementLearningSGD(lr=0.01, ng=2) policy.model.compile(loss=log_loss, optimizer=optimizer) # Helper to check initial conditions of the optimizer. def assertOptimizerInitialConditions(): for v in optimizer.gradient_sign: self.assertEqual(K.eval(v), 0) self.assertEqual(K.eval(optimizer.running_games), 2) initial_parameters = policy.model.get_weights() def assertModelEffect(changed): any_change = False for cur, init in zip(policy.model.get_weights(), initial_parameters): if not np.allclose(init, cur): any_change = True break self.assertEqual(any_change, changed) assertOptimizerInitialConditions() # Make moves on the state and get trainable (state, action) pairs from them. state_tensors = [] action_tensors = [] moves = [(2, 2), (16, 16), (3, 17), (16, 2), (4, 10), (10, 3)] for m in moves: (st_tensor, mv_tensor) = _make_training_pair(state, m, policy.preprocessor) state_tensors.append(st_tensor) action_tensors.append(mv_tensor) state.do_move(m) for i, (s, a) in enumerate(zip(state_tensors, action_tensors)): # Even moves in game 0, odd moves in game 1 game_idx = i % 2 optimizer.set_current_game(game_idx) is_last_move = i + 2 >= len(moves) if is_last_move: # Mark game 0 as a win and game 1 as a loss. optimizer.set_result(game_idx, game_idx == 0) else: # Games not finished yet; assert no change to optimizer state. assertOptimizerInitialConditions() # train_on_batch accumulates gradients, and should only cause a change to parameters # on the first call after the final set_result() call policy.model.train_on_batch(s, a) if i + 1 < len(moves): assertModelEffect(changed=False) else: assertModelEffect(changed=True) # Once both games finished, the last call to train_on_batch() should have triggered a reset # to the optimizer parameters back to initial conditions. assertOptimizerInitialConditions()
def test_eye_recursion(self): # a checkerboard pattern of black is 'technically' all true eyes # mutually supporting each other gs = GameState(7) for x in range(gs.size): for y in range(gs.size): if (x + y) % 2 == 1: gs.do_move((x, y), go.BLACK) self.assertTrue(gs.is_eye((0, 0), go.BLACK))
def test_sensible_greedy(self): gs = GameState() policy = CNNPolicy(["board", "ones", "turns_since"]) player = GreedyPolicyPlayer(policy) empty = (10, 10) for x in range(19): for y in range(19): if (x, y) != empty: gs.do_move((x, y), go.BLACK) gs.current_player = go.BLACK self.assertIsNone(player.get_move(gs))
def test_copy_maintains_shared_sets(self): gs = GameState(7) gs.do_move((4, 4), go.BLACK) gs.do_move((4, 5), go.BLACK) # assert that gs has *the same object* referenced by group/liberty sets self.assertTrue(gs.group_sets[4][5] is gs.group_sets[4][4]) self.assertTrue(gs.liberty_sets[4][5] is gs.liberty_sets[4][4]) gs_copy = gs.copy() self.assertTrue(gs_copy.group_sets[4][5] is gs_copy.group_sets[4][4]) self.assertTrue( gs_copy.liberty_sets[4][5] is gs_copy.liberty_sets[4][4])
def test_simple_eye(self): # create a black eye in top left (1, 1), white in bottom right (5, 5) gs = GameState(size=7) gs.do_move((1, 0)) # B gs.do_move((5, 4)) # W gs.do_move((2, 1)) # B gs.do_move((6, 5)) # W gs.do_move((1, 2)) # B gs.do_move((5, 6)) # W gs.do_move((0, 1)) # B gs.do_move((4, 5)) # W # test black eye top left self.assertTrue(gs.is_eyeish((1, 1), go.BLACK)) self.assertFalse(gs.is_eyeish((1, 1), go.WHITE)) # test white eye bottom right self.assertTrue(gs.is_eyeish((5, 5), go.WHITE)) self.assertFalse(gs.is_eyeish((5, 5), go.BLACK)) # test no eye in other random positions self.assertFalse(gs.is_eyeish((1, 0), go.BLACK)) self.assertFalse(gs.is_eyeish((1, 0), go.WHITE)) self.assertFalse(gs.is_eyeish((2, 2), go.BLACK)) self.assertFalse(gs.is_eyeish((2, 2), go.WHITE))
def test_snapback_is_not_ko(self): gs = GameState(size=5) # B X W B . # W W B . . # . . . . . # . . . . . # . . . . . # imagine black plays at 'X' capturing the white stone at (2, 0). # White may play again at (2, 0) to capture the black stones # at (0, 0), (1, 0). this is a 'snapback' not 'ko' # since it doesn't return the game to a previous position B = [(0, 0), (2, 1), (3, 0)] W = [(0, 1), (1, 1), (2, 0)] for (b, w) in zip(B, W): gs.do_move(b) gs.do_move(w) # do the capture of the single white stone gs.do_move((1, 0)) # there should be no ko self.assertIsNone(gs.ko) self.assertTrue(gs.is_legal((2, 0))) # now play the snapback gs.do_move((2, 0)) # check that the numbers worked out self.assertEqual(gs.num_black_prisoners, 2) self.assertEqual(gs.num_white_prisoners, 1)
def test_true_eye(self): gs = GameState(size=7) gs.do_move((1, 0), go.BLACK) gs.do_move((0, 1), go.BLACK) # false eye at 0, 0 self.assertTrue(gs.is_eyeish((0, 0), go.BLACK)) self.assertFalse(gs.is_eye((0, 0), go.BLACK)) # make it a true eye by turning the corner (1, 1) into an eye itself gs.do_move((1, 2), go.BLACK) gs.do_move((2, 1), go.BLACK) gs.do_move((2, 2), go.BLACK) gs.do_move((0, 2), go.BLACK) self.assertTrue(gs.is_eyeish((0, 0), go.BLACK)) self.assertTrue(gs.is_eye((0, 0), go.BLACK)) self.assertTrue(gs.is_eye((1, 1), go.BLACK))
def test_neighbors_edge_cases(self): st = GameState() st.do_move((0, 0)) # B B . . . . . st.do_move((5, 5)) # B W . . . . . st.do_move((0, 1)) # . . . . . . . st.do_move((6, 6)) # . . . . . . . st.do_move((1, 0)) # . . . . . W . st.do_move((1, 1)) # . . . . . . W # get_group in the corner self.assertEqual(len(st.get_group((0, 0))), 3, "group size in corner") # get_group of an empty space self.assertEqual(len(st.get_group((4, 4))), 0, "group size of empty space") # get_group of a single piece self.assertEqual(len(st.get_group((5, 5))), 1, "group size of single piece")
def test_liberties_after_capture(self): # creates 3x3 black group in the middle, that is then all captured # ...then an assertion is made that the resulting liberties after # capture are the same as if the group had never been there gs_capture = GameState(7) gs_reference = GameState(7) # add in 3x3 black stones for x in range(2, 5): for y in range(2, 5): gs_capture.do_move((x, y), go.BLACK) # surround the black group with white stones # and set the same white stones in gs_reference for x in range(2, 5): gs_capture.do_move((x, 1), go.WHITE) gs_capture.do_move((x, 5), go.WHITE) gs_reference.do_move((x, 1), go.WHITE) gs_reference.do_move((x, 5), go.WHITE) gs_capture.do_move((1, 1), go.WHITE) gs_reference.do_move((1, 1), go.WHITE) for y in range(2, 5): gs_capture.do_move((1, y), go.WHITE) gs_capture.do_move((5, y), go.WHITE) gs_reference.do_move((1, y), go.WHITE) gs_reference.do_move((5, y), go.WHITE) # board configuration and liberties of gs_capture and of gs_reference should be identical self.assertTrue(np.all(gs_reference.board == gs_capture.board)) self.assertTrue( np.all(gs_reference.liberty_counts == gs_capture.liberty_counts))
def test_standard_ko(self): # . B . . # B X B . # W B W . # . W . . gs = GameState(size=9) gs.do_move((1, 0)) # B gs.do_move((2, 0)) # W gs.do_move((2, 1)) # B gs.do_move((3, 1)) # W gs.do_move((1, 2)) # B gs.do_move((2, 2)) # W gs.do_move((0, 1)) # B gs.do_move((1, 1)) # W trigger capture and ko self.assertEqual(gs.num_black_prisoners, 1) self.assertEqual(gs.num_white_prisoners, 0) self.assertFalse(gs.is_legal((2, 1))) gs.do_move((5, 5)) gs.do_move((5, 6)) self.assertTrue(gs.is_legal((2, 1)))
class TestLiberties(unittest.TestCase): def setUp(self): # 0 1 2 3 4 5 6 7 8 9 A B # 0 . . . . . . . . . . . . # 1 . . . . . . . . . . . . # 2 . . . . . . . . . . . . # 3 . . . . . . . . . . . . # 4 . . . . . B B . . . . . # 5 . . . . . W B . . . . . # 6 . . . . . . B . . . . . # 7 . . . . . . . . . . . . # 8 . . . . . . . . . . . . # 9 . . . . . . . . . . W . # A . . . . . . . . . . W W # B . . . . . . . . . . . . self.s = GameState() self.s.do_move((4, 5)) self.s.do_move((5, 5)) self.s.do_move((5, 6)) self.s.do_move((10, 10)) self.s.do_move((4, 6)) self.s.do_move((10, 11)) self.s.do_move((6, 6)) self.s.do_move((9, 10)) def test_curr_liberties(self): self.assertEqual(self.s.liberty_counts[5][5], 2) self.assertEqual(self.s.liberty_counts[4][5], 8) self.assertEqual(self.s.liberty_counts[5][6], 8) def test_neighbors_edge_cases(self): st = GameState() st.do_move((0, 0)) # B B . . . . . st.do_move((5, 5)) # B W . . . . . st.do_move((0, 1)) # . . . . . . . st.do_move((6, 6)) # . . . . . . . st.do_move((1, 0)) # . . . . . W . st.do_move((1, 1)) # . . . . . . W # get_group in the corner self.assertEqual(len(st.get_group((0, 0))), 3, "group size in corner") # get_group of an empty space self.assertEqual(len(st.get_group((4, 4))), 0, "group size of empty space") # get_group of a single piece self.assertEqual(len(st.get_group((5, 5))), 1, "group size of single piece")
class TestMCTS(unittest.TestCase): def setUp(self): self.gs = GameState() self.mcts = MCTS(dummy_value, dummy_policy, dummy_rollout, n_playout=2) def _count_expansions(self): """Helper function to count the number of expansions past the root using the dummy policy """ node = self.mcts._root expansions = 0 # Loop over actions in decreasing probability. for action, _ in sorted(dummy_policy(self.gs), key=lambda (a, p): p, reverse=True): if action in node._children: expansions += 1 node = node._children[action] else: break return expansions def test_playout(self): self.mcts._playout(self.gs.copy(), 8) # Assert that the most likely child was visited (according to the dummy policy below). self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits) # Assert that the search depth expanded nodes 8 times. self.assertEqual(8, self._count_expansions()) def test_playout_with_pass(self): # Test that playout handles the end of the game (i.e. passing/no moves). Mock this by # creating a policy that returns nothing after 4 moves. def stop_early_policy(state): if len(state.history) <= 4: return dummy_policy(state) else: return [] self.mcts = MCTS(dummy_value, stop_early_policy, stop_early_policy, n_playout=2) self.mcts._playout(self.gs.copy(), 8) # Assert that (18, 18) and (18, 17) are still only visited once. self.assertEqual(1, self.mcts._root._children[(18, 18)]._n_visits) # Assert that no expansions happened after reaching the "end" in 4 moves. self.assertEqual(5, self._count_expansions()) def test_get_move(self): move = self.mcts.get_move(self.gs) self.mcts.update_with_move(move) # success if no errors def test_update_with_move(self): move = self.mcts.get_move(self.gs) self.gs.do_move(move) self.mcts.update_with_move(move) # Assert that the new root still has children. self.assertTrue(len(self.mcts._root._children) > 0) # Assert that the new root has no parent (the rest of the tree will be garbage collected). self.assertIsNone(self.mcts._root._parent) # Assert that the next best move according to the root is (18, 17), according to the # dummy policy below. self.assertEqual((18, 17), self.mcts._root.select()[0])