Exemplo n.º 1
0
    def __init__(self, board: Bitmap, mask: Bitmap, board_shp: Tuple,
                 node_col: PlayerAction, max_player: bool):
        """
        Parameters
            board = bitmap representing positions of current player
            mask = bitmap representing positions of both players
            board_shp = tuple giving the shape of the board (rows, columns)
            node_col = the column played to create this node (game state)
            max_player = indicates whether current player is the max player
        """

        # Update the game state and save game state attributes
        self.board, self.mask = board, mask
        self.shape: Tuple = board_shp
        self.node_col: int = node_col
        self.max_player: bool = max_player
        self.state = check_end_state(self.board, self.mask, self.shape)

        # Node attributes
        # Randomize the order of actions (i.e. the order of node creation)
        self.actions: List[int] = valid_actions(self.mask, board_shp)
        np.random.shuffle(self.actions)
        self.children: List[Connect4Node] = []

        # Upper Confidence Bound 1 (UCB1) attributes
        self.si: float = 0
        self.wi: float = 0
Exemplo n.º 2
0
def test_node_initialization():
    # Initialize a game
    player = cm.PLAYER1
    arr_bd, bit_bd, mask_bd, player = generate_full_board(player, 1)
    bd_shp = arr_bd.shape

    # Generate a board that is not a win, with only a single piece missing
    while cm.check_end_state(bit_bd, mask_bd, bd_shp) == cm.GameState.IS_WIN:
        arr_bd, bit_bd, mask_bd, player = generate_full_board(player, 1)
    print(cm.pretty_print_board(arr_bd))

    # Test whether node initializes and plays in proper (only empty) column
    move = agmcts.generate_move_mcts(arr_bd, player, None)[0]
    empty_col = cm.valid_actions(mask_bd, bd_shp)[0]
    print('MCTS plays in column {}'.format(move))
    assert move == empty_col
Exemplo n.º 3
0
    def sim_game(self):
        """ Simulates one iteration of a game from the current game state

        This function applies random actions until the game reaches a terminal
        state, either a win or a draw. It then returns the value associated
        with this state, which is propagated back up the tree to the root,
        updating the stats along the way.

        Returns
            True if max_player wins
            False if min_player wins
            -1 if the result is a draw
        """

        # Randomly choose a valid action until the game ends
        sim_board, sim_mask = self.board, self.mask
        game_state = check_end_state(sim_board, sim_mask, self.shape)
        curr_max_p = self.max_player
        while game_state == GameState.STILL_PLAYING:
            # Randomly select an action
            action = np.random.choice(valid_actions(sim_mask, self.shape))
            # Apply the action to the board
            sim_board, sim_mask = apply_action_cp(sim_board, sim_mask, action,
                                                  self.shape)
            # Update the max_player boolean
            curr_max_p = not curr_max_p
            # Check the game state after the new action is applied
            game_state = check_end_state(sim_board, sim_mask, self.shape)

        if game_state == GameState.IS_WIN:
            # TODO: possibly change how the score calculation works
            #  (i.e. return integers here instead of booleans)
            if curr_max_p:
                return True
            else:
                return False
        elif game_state == GameState.IS_DRAW:
            return -1
        else:
            print('Error in Simulation')
Exemplo n.º 4
0
def alpha_beta(board: Bitmap, mask: Bitmap, max_player: bool, depth: int,
               alpha: GameScore, beta: GameScore,
               board_shp: Tuple) -> Tuple[GameScore, Optional[PlayerAction]]:
    """
    Recursively call alpha_beta to build a game tree to a pre-determined
    max depth. Once at the max depth, or at a terminal node, calculate and
    return the heuristic score. Scores farther down the tree are penalized.

    :param board: bitmap representing positions of current player
    :param mask: bitmap representing positions of both players
    :param max_player: boolean indicating whether the depth at which alpha_beta
                       is called from is a maximizing or minimizing player
    :param depth: the current depth in the game tree
    :param alpha: the currently best score for the maximizing player along the
                  path to root
    :param beta: the currently best score for the minimizing player along the
                  path to root
    :param board_shp: the shape of the game board

    :return: the best action and the associated score
    """

    # If the node is at the max depth or a terminal node calculate the score
    max_depth = 7
    win_score = 150
    state_p = check_end_state(board ^ mask, mask, board_shp)
    if state_p == GameState.IS_WIN:
        if max_player:
            return GameScore(-win_score), None
        else:
            return GameScore(win_score), None
    elif state_p == GameState.IS_DRAW:
        return 0, None
    elif depth == max_depth:
        return heuristic_solver_bits(board, mask, board_shp[0],
                                     max_player), None

    # For each potential action, call alpha_beta
    pot_actions = valid_actions(mask, board_shp)
    if max_player:
        score = -100000
        action = -1
        for col in pot_actions:
            # Apply the current action
            min_board, new_mask = apply_action_cp(board, mask, col, board_shp)
            # Call alpha-beta
            new_score, temp = alpha_beta(min_board, new_mask, False, depth + 1,
                                         alpha, beta, board_shp)
            new_score -= depth
            # Check whether the score updates
            if new_score > score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score >= beta:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score > alpha:
                alpha = score
        return GameScore(score), PlayerAction(action)
    else:
        score = 100000
        action = -1
        for col in pot_actions:
            # Apply the current action
            max_board, new_mask = apply_action_cp(board, mask, col, board_shp)
            # Call alpha-beta
            new_score, temp = alpha_beta(max_board, new_mask, True, depth + 1,
                                         alpha, beta, board_shp)
            new_score += depth
            # Check whether the score updates
            if new_score < score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score <= alpha:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score < beta:
                beta = score
        return GameScore(score), PlayerAction(action)
Exemplo n.º 5
0
def alpha_beta_oracle(board: cm.Bitmap, mask: cm.Bitmap, max_player: bool,
                      alpha: GameScore, beta: GameScore, board_shp: Tuple,
                      depth: int) -> Tuple[GameScore, Optional[int]]:
    """ Function used to find guaranteed future wins, based on optimal play

    A guaranteed win for the max_player will return a score modified by the
    depth at which the win should occur. The number of moves in which the
    player should win is returned, along with the score. Guaranteed losses
    are accounted for in a similar way.
    """

    max_depth = 8
    win_score = 100
    state_p = cm.check_end_state(board ^ mask, mask, board_shp)
    if state_p == cm.GameState.IS_WIN:
        if max_player:
            return GameScore(-win_score), None
        else:
            return GameScore(win_score), None
    elif depth == max_depth:
        return GameScore(0), None

    # For each potential action, call alpha_beta
    pot_actions = cm.valid_actions(mask, board_shp)
    if max_player:
        score = -100000
        for col in pot_actions:
            # Apply the current action
            min_board, new_mask = cm.apply_action_cp(board, mask, col,
                                                     board_shp)
            # Call alpha-beta
            new_score, _ = alpha_beta_oracle(min_board, new_mask, False, alpha,
                                             beta, board_shp, depth + 1)
            new_score -= depth
            # Check whether the score updates
            if new_score > score:
                score = new_score
            # Check whether we can prune the rest of the branch
            if score >= beta:
                break
            # Check whether alpha updates the score
            if score > alpha:
                alpha = score
        # If this is the root node, return the optimal number of moves
        if depth == 0:
            if score > 0:
                return GameScore(score), 2 * (win_score - score) + 1
            else:
                return GameScore(score), 2 * (win_score + score)
        else:
            return GameScore(score), None
    else:
        score = 100000
        for col in pot_actions:
            # Apply the current action, continue if column is full
            max_board, new_mask = cm.apply_action_cp(board, mask, col,
                                                     board_shp)
            # Call alpha-beta
            new_score, _ = alpha_beta_oracle(max_board, new_mask, True, alpha,
                                             beta, board_shp, depth + 1)
            new_score += depth
            # Check whether the score updates
            if new_score < score:
                score = new_score
            # Check whether we can prune the rest of the branch
            if score <= alpha:
                break
            # Check whether beta updates the score
            if score < beta:
                beta = score
        return GameScore(score), None