def test_legal_moves_not_ko(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 0, board) # white make_play(2, 1, board) # black make_play(8, 8, board) # white random pos # ○ ● ● . . . # . ○ ○ . . . # . . . . . . make_play(3, 0, board) # black captures_first # ○ . . ○ . . # . ○ ○ . . . # . . . . . . mask = legal_moves(board) self.assertEqual(board[0][0][1][0], 0) # white stone 1 self.assertEqual(board[0][0][1][1], 0) # was taken self.assertEqual(board[0][0][2][0], 0) # white stone 2 self.assertEqual(board[0][0][2][1], 0) # was taken self.assertEqual(board[0][0][1][2], 1) # white stone 1 was here self.assertEqual(board[0][0][1][3], 0) # black stone was not here self.assertEqual(board[0][0][2][2], 1) # white stone 2 was here self.assertEqual(board[0][0][2][3], 0) # black stone was not here self.assertEqual(mask[1], False) self.assertEqual(mask[2], False)
def new_subtree(policy, board, parent, add_noise=False): leaf = {} # We need to check for legal moves here because MCTS might not have expanded # this subtree mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) # Add Dirichlet noise. tmp = policy.reshape(-1) if add_noise: noise = np.random.dirichlet( [DIRICHLET_ALPHA for i in range(tmp.shape[0])]) tmp = (1 - DIRICHLET_EPSILON) * tmp + DIRICHLET_EPSILON * noise for move, p in enumerate(tmp): if isinstance(p, MaskedConstant): continue leaf[move] = { 'count': 0, 'value': 0, 'mean_value': 0, 'p': p, 'subtree': {}, 'parent': parent, } return leaf
def test_simulation_can_recover_from_sucide_move_white(self): model = self.model board, player = game_init() x = randrange(SIZE) y = randrange(SIZE) for i in range(SIZE): for j in range(SIZE): make_play(0, SIZE, board) # Black does not play playing if i == x and j == y: make_play(0, SIZE, board) # pass on one intersection else: make_play(i, j, board) make_play(0, SIZE, board) # Black does not play playing policies, values = model.predict_on_batch(board) policy = policies[0] policy[y * SIZE + x], policy[SIZE * SIZE] = policy[ SIZE * SIZE], policy[y * SIZE + x] # Make best move sucide mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) self.assertEqual(np.argmax(policy), y * SIZE + x) # Best option in policy is sucide tree = new_tree(policy, board) chosen_play = select_play(policy, board, mcts_simulations=128, mcts_tree=tree, temperature=0, model=model) # First simulation chooses pass, second simulation chooses sucide (p is still higher), # then going deeper it chooses pass again (value is higher) self.assertEqual(chosen_play, SIZE * SIZE) # Pass move is best option
def select_play(policy, board, mcts_simulations, mcts_tree, temperature, model): mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) index = mcts_decision(policy, board, mcts_simulations, mcts_tree, temperature, model) x, y = index2coord(index) return index
def select_play(policy, board, mcts_simulations, mcts_tree, temperature, model): if (get_winner(board)[0]==board[0, 0, 0, -1]) and (board[:, :, :, 0]==board[:, :, :, 2]).all() and (board[:, :, :, 1]==board[:, :, :, 3]).all(): return SIZE * SIZE mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) index = mcts_decision(policy, board, mcts_simulations, mcts_tree, temperature, model) x, y = index2coord(index) return index
def select_play(policy, board, mcts_simulations, mcts_tree, temperature, model): mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) start = datetime.datetime.now() index = mcts_decision(policy, board, mcts_simulations, mcts_tree, temperature, model) end = datetime.datetime.now() d = tree_depth(mcts_tree) # print("################TIME PER MOVE: %s tree depth: %s" % (end - start, d)) return index
def test_legal_moves_suicide2(self): board, player = game_init() make_play(3, 0, board) # black = 1, col, row make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(3, 1, board, -1) # white make_play(4, 0, board, -1) # white # . ● . ○ ● . # . ○ ● ● . . # . . . . . . mask = legal_moves(board) self.assertEqual(mask[2], True) # suicide move should be illegal
def test_legal_moves_suicide(self): board, player = game_init() make_play(0, 1, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # . ● . ● . . # ○ ○ ● . . . # . . . . . . mask = legal_moves(board) self.assertEqual(mask[2], True) # suicide move should be illegal
def test_legal_moves_not_suicide(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # ○ ● . ● . . # . ○ ● . . . # . . . . . . mask = legal_moves(board) self.assertEqual(mask[2], False) # not a suicide when capture other stones
def test_legal_moves_ko(self): board, player = game_init() make_play(0, 0, board) # black make_play(1, 0, board) # white make_play(1, 1, board) # black make_play(2, 1, board) # white make_play(8, 8, board) # black random pos make_play(3, 0, board) # white # ○ ● . ● . . # . ○ ● . . . # . . . . . . make_play(2, 0, board) # black captures_first # ○ . ○ ● . . # . ○ ● . . . # . . . . . . mask = legal_moves(board) self.assertEqual(board[0][0][1][0], 0) # white stone self.assertEqual(board[0][0][1][1], 0) # was taken self.assertEqual(board[0][0][1][2], 1) # white stone was here self.assertEqual(board[0][0][1][3], 0) # black stone was not here self.assertEqual(mask[1], True)
def test_simulation_can_recover_from_sucide_move_white(self): model = self.model board, player = game_init() give_two_eyes(board, 'W') policies, values = model.predict_on_batch(board) policy = policies[0] if np.argmax(policy) == PASS: policy[0], policy[PASS] = policy[PASS], policy[0] # Make best move sucide mask = legal_moves(board) policy = ma.masked_array(policy, mask=mask) self.assertEqual(np.argmax(policy), 0) # Best option in policy is sucide else: print("Warning, policy is not great") tree = Tree() tree.new_tree(policy, board, move=2) chosen_play = select_play(policy, board, mcts_simulations=128, mcts_tree=tree.tree, temperature=0, model=model) # First simulation chooses pass, second simulation chooses sucide (p is still higher), # then going deeper it chooses pass again (value is higher) self.assertEqual(chosen_play, PASS) # Pass move is best option