Exemplo n.º 1
0
def negamax_alpha_beta(board: np.ndarray, player: BoardPiece, depth: int,
                       alpha: float, beta: float) -> float:
    """
    Search game tree using alpha-beta pruning with negamax.

    :param board: current board state
    :param player: current player
    :param depth: max depth to search in game tree
    :param alpha: alpha value for pruning
    :param beta: beta value for pruning
    :return:
    """

    # if we're at an end state,
    if (depth == 0) or check_game_over(board):
        return evaluate_end_state(board, player)

    # otherwise loop over child nodes
    other_player = BoardPiece(player % 2 + 1)
    value = -np.inf
    for move in get_valid_moves(board):
        value = max(
            value, -negamax_alpha_beta(
                apply_player_action(board, move, player, copy=True),
                other_player, depth - 1, -beta, -alpha))
        alpha = max(alpha, value)
        if alpha >= beta:
            break
    # print(f'value:{value}')
    # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}')
    # print(f'move:{move}; max value:{value}')
    return value
Exemplo n.º 2
0
def negamax(
    board: np.ndarray,
    player: BoardPiece,
    depth: int,
) -> float:
    """
    Search game tree using plain negamax.
    This is "colorless" negamax -- it assumes the heuristic value is from the perspective of the player its called on

    :param board: current board state
    :param player: current player
    :param depth: max depth to search in game tree
    :return:
    """
    # if we're at an end state,
    if (depth == 0) or check_game_over(board):
        return evaluate_end_state(board, player)

    # otherwise loop over child nodes
    other_player = BoardPiece(player % 2 + 1)
    value = -np.inf
    for move in get_valid_moves(board):
        value = max(
            value,
            -negamax(apply_player_action(board, move, player, copy=True),
                     other_player, depth - 1))
    # print(f'value:{value}')
    # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}')
    # print(f'move:{move}; max value:{value}')
    return value
Exemplo n.º 3
0
def test_get_valid_moves():
    from agents.common import get_valid_moves
    from agents.common import initialize_game_state

    dummy_board = initialize_game_state()
    all_moves = np.arange(dummy_board.shape[1])

    assert np.all(get_valid_moves(dummy_board) == all_moves)
Exemplo n.º 4
0
Arquivo: mcts.py Projeto: tah0/conn4
 def unexpanded_moves(self) -> list:
     """
     Return which moves have not been expanded yet.
     :return: list of unexpanded moves (it's a list so we can pop it, later)
     """
     #     return [m for m in self.legal_moves if m not in self.expanded_moves]
     if self._unexpanded_moves is None:
         self._unexpanded_moves = list(get_valid_moves(self.board))
     else:
         return self._unexpanded_moves
Exemplo n.º 5
0
Arquivo: mcts.py Projeto: tah0/conn4
    def __init__(self, board: np.ndarray, to_play: BoardPiece, last_move: PlayerAction = None, parent=None):
        # board
        self.board = board

        # parent
        self.to_play = to_play  # which player's turn it is
        self.last_move = last_move  # what move resulted in the board
        self.parent = parent  # parent node -- the previous state

        # children methods
        self.children = {}  # dict of children resulting from valid moves
        self.legal_moves = list(get_valid_moves(board))
        self._unexpanded_moves = list(get_valid_moves(board))  # moves not evaluated yet
        # self._unexpanded_moves = None  # moves not evaluated yet
        self.expanded_moves = []

        # MCTS methods
        self.n_plays = 0
        self.n_wins = 0
Exemplo n.º 6
0
def generate_move_random(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    Choose a valid, non-full column randomly and return it as `action`
    :param board:
    :param player:
    :param saved_state:
    :return:
    """
    open_moves = get_valid_moves(board)
    action = np.random.choice(open_moves)
    # TODO: what to do with saved_state?
    return action, saved_state
Exemplo n.º 7
0
def minimax_value(board: np.ndarray, player: BoardPiece, maxing: bool,
                  depth: int) -> float:
    """

    :param board:
    :param player:
    :param maxing:
    :param depth:
    :return:
    """
    other_player = BoardPiece(player % 2 + 1)
    valid_moves = get_valid_moves(board)
    value = 0

    if depth == 0 or check_game_over(board):
        return evaluate_end_state(board, player)
    elif maxing is True:
        value = -np.inf
        for _, move in enumerate(valid_moves):
            # print('Maxing')
            # print('move:', move)
            MMv = minimax_value(board=apply_player_action(board,
                                                          move,
                                                          player,
                                                          copy=True),
                                player=player,
                                maxing=False,
                                depth=depth - 1)
            # print('MM value:', MMv)
            value = max(value, MMv)
    else:
        value = np.inf
        for _, move in enumerate(valid_moves):
            # print('Mining')
            # print('move:', move)
            MMv = minimax_value(board=apply_player_action(board,
                                                          move,
                                                          player,
                                                          copy=True),
                                player=player,
                                maxing=True,
                                depth=depth - 1)
            # print('MM value:', MMv)
            value = min(value, MMv)
    return value
Exemplo n.º 8
0
Arquivo: mcts.py Projeto: tah0/conn4
    def simulate(self, node: MonteCarloNode) -> Union[BoardPiece, GameState]:
        """
        Simulate a game from a given node -- outcome is either player or GameState.IS_DRAW

        :param node:
        :return:
        """
        current_rollout_state = node.board.copy()
        curr_player = node.to_play
        while not check_game_over(current_rollout_state):
            possible_moves = get_valid_moves(current_rollout_state)
            if possible_moves.size > 1:
                action = np.random.choice(list(possible_moves))
            else:
                action = possible_moves

            current_rollout_state = apply_player_action(current_rollout_state, action, curr_player, copy=True)
            curr_player = BoardPiece(curr_player % 2 + 1)
        return evaluate_end_state(current_rollout_state)
Exemplo n.º 9
0
def alpha_beta_value(board: np.ndarray, player: BoardPiece, maxing: bool,
                     depth: int, alpha, beta) -> float:
    other_player = BoardPiece(player % 2 + 1)
    valid_moves = get_valid_moves(board)

    if depth == 0 or check_game_over(board):
        return evaluate_end_state(board, player)
    elif maxing is True:
        value = -np.inf
        for _, move in enumerate(valid_moves):
            ABv = alpha_beta_value(board=apply_player_action(board,
                                                             move,
                                                             player,
                                                             copy=True),
                                   player=player,
                                   maxing=False,
                                   depth=depth - 1,
                                   alpha=alpha,
                                   beta=beta)
            value = max(value, ABv)
            alpha = max(alpha, value)
            if alpha >= beta:
                break
        return value
    else:
        value = np.inf
        for _, move in enumerate(valid_moves):
            ABv = alpha_beta_value(board=apply_player_action(board,
                                                             move,
                                                             player,
                                                             copy=True),
                                   player=player,
                                   maxing=True,
                                   depth=depth - 1,
                                   alpha=alpha,
                                   beta=beta)
            value = min(value, ABv)
            beta = min(beta, value)
            if beta <= alpha:
                break
        return value
Exemplo n.º 10
0
def test_best_play():

    # check that best plays are max n_plays
    tree = MonteCarlo(player)
    tree.make_node(initial_state, player)
    key = hash(initial_state.tostring()) + hash(player)
    root = tree.nodes[key]
    tree.run_search(root.board, root.to_play, n_sims=5000)
    # check that best move is the max of n_plays of children
    scores = [root.get_child(a).n_plays for a in root.legal_moves]
    assert tree.best_play(
        root.board, root.to_play)[0] == root.legal_moves[np.argmax(scores)]

    # check that winning moves are selected
    for c in get_valid_moves(initial_state):
        near_win = copy.deepcopy(initial_state)
        near_win[:3, c] = player
        # print(near_win)
        tree = MonteCarlo(player)
        tree.make_node(near_win, player)
        tree.run_search(near_win, player, n_sims=1000)
        # print(tree.get_stats(near_win, player))
        assert tree.best_play(near_win, player)[0] == c
Exemplo n.º 11
0
def generate_move_minimax(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    :param board:
    :param player:
    :param saved_state:
    :return:
    """
    open_moves = get_valid_moves(board)
    print(f'Open moves: {open_moves}')

    new_states = [
        apply_player_action(board, move, player, copy=True)
        for move in open_moves
    ]

    # if a move results in a win, play it
    winning_moves = np.array([
        check_end_state(state, player) for state in new_states
    ]) == GameState.IS_WIN
    if np.any(winning_moves):
        actions = open_moves[np.argwhere(winning_moves)].squeeze()
        if actions.size > 1:
            action = np.random.choice(actions)
        else:
            action = actions
        print(f'playing action {action} for a win')
        return action, saved_state

    # if a move results in blocking an opponent's win, play it
    other_player = BoardPiece(player % 2 + 1)

    new_states_other = [
        apply_player_action(board, move, other_player, copy=True)
        for move in open_moves
    ]
    blocking_moves = np.array([
        check_end_state(state, other_player) for state in new_states_other
    ]) == GameState.IS_WIN
    if np.any(blocking_moves):
        actions = open_moves[np.argwhere(blocking_moves)].squeeze()
        if actions.size > 1:
            action = np.random.choice(actions)
        else:
            action = actions
        print(f'playing action {action} for a block')
        return action, saved_state

    # otherwise, use the heuristic function to score possible states

    # scores = [minimax_value(apply_player_action(board, move, player, copy=True), player, True, MAX_DEPTH) for move in open_moves]
    scores = [
        alpha_beta_value(apply_player_action(board, move, player, copy=True),
                         player,
                         True,
                         MAX_DEPTH,
                         alpha=-np.inf,
                         beta=np.inf) for move in open_moves
    ]

    # randomly select among best moves
    if np.sum(scores == np.max(scores)) > 1:
        best_moves = open_moves[np.argwhere(
            scores == np.max(scores))].squeeze()
        action = np.random.choice(best_moves)
    else:
        action = open_moves[np.argmax(scores)].squeeze()
    print(f'Heuristic values: {scores}')
    print(f'playing action {action} with heuristic value {np.max(scores)}')
    return action, saved_state
Exemplo n.º 12
0
def test_init_node():
    assert np.all(initial_node.legal_moves == get_valid_moves(initial_state))
    assert np.all(initial_node.legal_moves == initial_node.unexpanded_moves)
    assert np.all(initial_node.board == initial_state)