示例#1
0
    def test_legal_moves_not_ko(self):
        board, player = game_init()

        make_play(0, 0, board) # black
        make_play(1, 0, board) # white
        make_play(1, 1, board) # black
        make_play(2, 0, board) # white
        make_play(2, 1, board) # black
        make_play(8, 8, board) # white random pos
        # ○ ● ● . . .
        # . ○ ○ . . .
        # . . . . . .
        make_play(3, 0, board) # black captures_first
        # ○ . . ○ . .
        # . ○ ○ . . .
        # . . . . . .
        mask = legal_moves(board)
        self.assertEqual(board[0][0][1][0], 0) # white stone 1
        self.assertEqual(board[0][0][1][1], 0) # was taken
        self.assertEqual(board[0][0][2][0], 0) # white stone 2
        self.assertEqual(board[0][0][2][1], 0) # was taken
        self.assertEqual(board[0][0][1][2], 1) # white stone 1 was here
        self.assertEqual(board[0][0][1][3], 0) # black stone was not here
        self.assertEqual(board[0][0][2][2], 1) # white stone 2 was here
        self.assertEqual(board[0][0][2][3], 0) # black stone was not here
        self.assertEqual(mask[1], False)
        self.assertEqual(mask[2], False)
示例#2
0
def new_subtree(policy, board, parent, add_noise=False):
    leaf = {}

    # We need to check for legal moves here because MCTS might not have expanded
    # this subtree
    mask = legal_moves(board)
    policy = ma.masked_array(policy, mask=mask)

    # Add Dirichlet noise.
    tmp = policy.reshape(-1)
    if add_noise:
        noise = np.random.dirichlet(
            [DIRICHLET_ALPHA for i in range(tmp.shape[0])])
        tmp = (1 - DIRICHLET_EPSILON) * tmp + DIRICHLET_EPSILON * noise

    for move, p in enumerate(tmp):
        if isinstance(p, MaskedConstant):
            continue

        leaf[move] = {
            'count': 0,
            'value': 0,
            'mean_value': 0,
            'p': p,
            'subtree': {},
            'parent': parent,
        }

    return leaf
示例#3
0
    def test_simulation_can_recover_from_sucide_move_white(self):
        model = self.model
        board, player = game_init()

        x = randrange(SIZE)
        y = randrange(SIZE)
        for i in range(SIZE):
            for j in range(SIZE):
                make_play(0, SIZE, board)  # Black does not play playing
                if i == x and j == y:
                    make_play(0, SIZE, board)  # pass on one intersection
                else:
                    make_play(i, j, board)
        make_play(0, SIZE, board)  # Black does not play playing

        policies, values = model.predict_on_batch(board)
        policy = policies[0]
        policy[y * SIZE + x], policy[SIZE * SIZE] = policy[
            SIZE * SIZE], policy[y * SIZE + x]  # Make best move sucide
        mask = legal_moves(board)
        policy = ma.masked_array(policy, mask=mask)
        self.assertEqual(np.argmax(policy),
                         y * SIZE + x)  # Best option in policy is sucide
        tree = new_tree(policy, board)
        chosen_play = select_play(policy,
                                  board,
                                  mcts_simulations=128,
                                  mcts_tree=tree,
                                  temperature=0,
                                  model=model)

        # First simulation chooses pass, second simulation chooses sucide (p is still higher),
        # then going deeper it chooses pass again (value is higher)
        self.assertEqual(chosen_play, SIZE * SIZE)  # Pass move is best option
示例#4
0
def select_play(policy, board, mcts_simulations, mcts_tree, temperature,
                model):
    mask = legal_moves(board)
    policy = ma.masked_array(policy, mask=mask)
    index = mcts_decision(policy, board, mcts_simulations, mcts_tree,
                          temperature, model)

    x, y = index2coord(index)
    return index
示例#5
0
def select_play(policy, board, mcts_simulations, mcts_tree, temperature, model):
    if (get_winner(board)[0]==board[0, 0, 0, -1]) and (board[:, :, :, 0]==board[:, :, :, 2]).all() and (board[:, :, :, 1]==board[:, :, :, 3]).all():
        return SIZE * SIZE
    mask = legal_moves(board)
    policy = ma.masked_array(policy, mask=mask)
    index = mcts_decision(policy, board, mcts_simulations, mcts_tree, temperature, model)

    x, y = index2coord(index)
    return index
示例#6
0
def select_play(policy, board, mcts_simulations, mcts_tree, temperature,
                model):
    mask = legal_moves(board)
    policy = ma.masked_array(policy, mask=mask)
    start = datetime.datetime.now()
    index = mcts_decision(policy, board, mcts_simulations, mcts_tree,
                          temperature, model)
    end = datetime.datetime.now()
    d = tree_depth(mcts_tree)
    # print("################TIME PER MOVE: %s   tree depth: %s" % (end - start, d))
    return index
示例#7
0
    def test_legal_moves_suicide2(self):
        board, player = game_init()

        make_play(3, 0, board)  # black = 1, col, row
        make_play(1, 0, board)  # white
        make_play(1, 1, board)  # black
        make_play(2, 1, board)  # white
        make_play(3, 1, board, -1)  # white
        make_play(4, 0, board, -1)  # white
        # . ● . ○ ● .
        # . ○ ● ● . .
        # . . . . . .
        mask = legal_moves(board)
        self.assertEqual(mask[2], True)  # suicide move should be illegal
示例#8
0
    def test_legal_moves_suicide(self):
        board, player = game_init()

        make_play(0, 1, board)  # black
        make_play(1, 0, board)  # white
        make_play(1, 1, board)  # black
        make_play(2, 1, board)  # white
        make_play(8, 8, board)  # black random pos
        make_play(3, 0, board)  # white
        # . ● . ● . .
        # ○ ○ ● . . .
        # . . . . . .
        mask = legal_moves(board)
        self.assertEqual(mask[2], True)  # suicide move should be illegal
示例#9
0
    def test_legal_moves_not_suicide(self):
        board, player = game_init()

        make_play(0, 0, board)  # black
        make_play(1, 0, board)  # white
        make_play(1, 1, board)  # black
        make_play(2, 1, board)  # white
        make_play(8, 8, board)  # black random pos
        make_play(3, 0, board)  # white
        # ○ ● . ● . .
        # . ○ ● . . .
        # . . . . . .
        mask = legal_moves(board)
        self.assertEqual(mask[2],
                         False)  # not a suicide when capture other stones
示例#10
0
    def test_legal_moves_ko(self):
        board, player = game_init()

        make_play(0, 0, board) # black
        make_play(1, 0, board) # white
        make_play(1, 1, board) # black
        make_play(2, 1, board) # white
        make_play(8, 8, board) # black random pos
        make_play(3, 0, board) # white
        # ○ ● . ● . .
        # . ○ ● . . .
        # . . . . . .
        make_play(2, 0, board) # black captures_first
        # ○ . ○ ● . .
        # . ○ ● . . .
        # . . . . . .
        mask = legal_moves(board)
        self.assertEqual(board[0][0][1][0], 0) # white stone
        self.assertEqual(board[0][0][1][1], 0) # was taken
        self.assertEqual(board[0][0][1][2], 1) # white stone was here
        self.assertEqual(board[0][0][1][3], 0) # black stone was not here
        self.assertEqual(mask[1], True)
示例#11
0
    def test_simulation_can_recover_from_sucide_move_white(self):
        model = self.model
        board, player = game_init()

        give_two_eyes(board, 'W')

        policies, values = model.predict_on_batch(board)
        policy = policies[0]

        if np.argmax(policy) == PASS:
            policy[0], policy[PASS] = policy[PASS], policy[0] # Make best move sucide
            mask = legal_moves(board)
            policy = ma.masked_array(policy, mask=mask)
            self.assertEqual(np.argmax(policy), 0) # Best option in policy is sucide
        else:
            print("Warning, policy is not great")

        tree = Tree()
        tree.new_tree(policy, board, move=2)
        chosen_play = select_play(policy, board, mcts_simulations=128, mcts_tree=tree.tree, temperature=0, model=model)
        
        # First simulation chooses pass, second simulation chooses sucide (p is still higher),
        # then going deeper it chooses pass again (value is higher)
        self.assertEqual(chosen_play, PASS) # Pass move is best option