示例#1
0
def generate_move_minimax(
    board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    :param board:
    :param player:
    :param saved_state:
    :return:
    """
    open_moves = get_valid_moves(board)
    print(f'Open moves: {open_moves}')

    new_states = [
        apply_player_action(board, move, player, copy=True)
        for move in open_moves
    ]

    # if a move results in a win, play it
    winning_moves = np.array([
        check_end_state(state, player) for state in new_states
    ]) == GameState.IS_WIN
    if np.any(winning_moves):
        actions = open_moves[np.argwhere(winning_moves)].squeeze()
        if actions.size > 1:
            action = np.random.choice(actions)
        else:
            action = actions
        print(f'playing action {action} for a win')
        return action, saved_state

    # if a move results in blocking an opponent's win, play it
    other_player = BoardPiece(player % 2 + 1)

    new_states_other = [
        apply_player_action(board, move, other_player, copy=True)
        for move in open_moves
    ]
    blocking_moves = np.array([
        check_end_state(state, other_player) for state in new_states_other
    ]) == GameState.IS_WIN
    if np.any(blocking_moves):
        actions = open_moves[np.argwhere(blocking_moves)].squeeze()
        if actions.size > 1:
            action = np.random.choice(actions)
        else:
            action = actions
        print(f'playing action {action} for a block')
        return action, saved_state

    # otherwise, use the heuristic function to score possible states

    # scores = [minimax_value(apply_player_action(board, move, player, copy=True), player, True, MAX_DEPTH) for move in open_moves]
    scores = [
        alpha_beta_value(apply_player_action(board, move, player, copy=True),
                         player,
                         True,
                         MAX_DEPTH,
                         alpha=-np.inf,
                         beta=np.inf) for move in open_moves
    ]

    # randomly select among best moves
    if np.sum(scores == np.max(scores)) > 1:
        best_moves = open_moves[np.argwhere(
            scores == np.max(scores))].squeeze()
        action = np.random.choice(best_moves)
    else:
        action = open_moves[np.argmax(scores)].squeeze()
    print(f'Heuristic values: {scores}')
    print(f'playing action {action} with heuristic value {np.max(scores)}')
    return action, saved_state
示例#2
0
def minimax(board: np.ndarray, depth: int, alpha: int, beta: int,
            player: BoardPiece, maximizing_player: bool) -> Tuple[int, int]:
    '''
	Returns a column where action should be placed and the min and max score for GameState
	:param board: current state of board
	:param depth: depth of search tree
	:param maximizingPlayer: True if we want to max for player
	:return: min or max score for action of player
	'''

    #check which player is the agent so that we don't max/min for wrong player
    if player == PLAYER1:
        opponent_player = PLAYER2
    else:
        opponent_player = PLAYER1

    #check which columns are currently open
    open_cols = np.asarray(check_open_columns(board))

    #check if depth is 0
    if depth == 0:
        score = heuristic(board, player)
        return None, score

    #check if we're at a leaf/terminal node
    if check_end_state(board, player) != GameState.STILL_PLAYING:
        if connected_four(board, player):  #agent won
            return None, 100000
        if connected_four(board, opponent_player):  #opponent won
            return None, -100000
        else:  #must be a draw
            return None, 0

    if maximizing_player:  #get max score for agent
        score = -math.inf
        for column in open_cols:
            #now simulate making a move and check what score it would get, save the original board in board
            board, board_copy = apply_player_action(board, column, player,
                                                    True)
            # recursive call to minimax with depth-1 with board_copy so board isn't modified
            next_score = minimax(board_copy, depth - 1, alpha, beta, player,
                                 False)[1]  #only get the score
            #if the score is better save score and column
            if next_score > score:
                score = next_score
                action_column = column
            #evaluate alpha for early stopping
            alpha = max(alpha, score)
            if alpha >= beta:  #don't evaluate more options down this path of tree
                break
        return action_column, score

    else:
        score = math.inf
        for column in open_cols:
            board, action_board = apply_player_action(board, column,
                                                      opponent_player, True)
            next_score = minimax(action_board, depth - 1, alpha, beta, player,
                                 True)[1]
            if next_score < score:
                score = next_score
                action_column = column
            beta = min(
                beta,
                score)  #here we want to minimize since we're opponent player
            if alpha >= beta:
                break
        return action_column, score
示例#3
0
def alpha_beta(board: Bitmap, mask: Bitmap, max_player: bool, depth: int,
               alpha: GameScore, beta: GameScore, board_shp: Tuple
               ) -> Tuple[GameScore, Optional[PlayerAction]]:
    """
    Recursively call alpha_beta to build a game tree to a pre-determined
    max depth. Once at the max depth, or at a terminal node, calculate and
    return the heuristic score. Scores farther down the tree are penalized.

    :param board: bitmap representing positions of current player
    :param mask: bitmap representing positions of both players
    :param max_player: boolean indicating whether the depth at which alpha_beta
                       is called from is a maximizing or minimizing player
    :param depth: the current depth in the game tree
    :param alpha: the currently best score for the maximizing player along the
                  path to root
    :param beta: the currently best score for the minimizing player along the
                  path to root
    :param board_shp: the shape of the game board

    :return: the best action and the associated score
    """

    # If the node is at the max depth or a terminal node calculate the score
    max_depth = 7
    win_score = 150
    state_p = check_end_state(board, mask, board_shp)
    # not_player = board ^ mask
    # state_np = check_end_state(not_player, mask, board_shp)
    if state_p == GameState.IS_WIN:
        if max_player:
            return GameScore(win_score), None
        else:
            return GameScore(-win_score), None
    # elif state_np == GameState.IS_WIN:
    #     if max_player:
    #         return GameScore(-win_score), None
    #     else:
    #         return GameScore(win_score), None
    elif state_p == GameState.IS_DRAW:
        return 0, None
    elif depth == max_depth:
        return heuristic_solver_bits(board, mask, board_shp[0], max_player), None

    # For each potential action, call alpha_beta
    if max_player:
        score = -100000
        for col in range(board_shp[1]):
            # Apply the current action, continue if column is full
            try:
                min_board, new_mask = apply_player_action_cp(board, mask,
                                                             col, board_shp[0])
            except IndexError:
                continue
            # Call alpha-beta
            new_score, temp = alpha_beta(min_board, new_mask, False, depth + 1,
                                         alpha, beta, board_shp)
            new_score -= depth
            # Check whether the score updates
            if new_score > score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score >= beta:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score > alpha:
                alpha = score
        return GameScore(score), PlayerAction(action)
    else:
        score = 100000
        for col in range(board_shp[1]):
            # Apply the current action, continue if column is full
            try:
                max_board, new_mask = apply_player_action_cp(board, mask,
                                                             col, board_shp[0])
            except IndexError:
                continue
            # Call alpha-beta
            new_score, temp = alpha_beta(max_board, new_mask, True, depth + 1,
                                         alpha, beta, board_shp)
            new_score += depth
            # Check whether the score updates
            if new_score < score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score <= alpha:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score < beta:
                beta = score
        return GameScore(score), PlayerAction(action)
示例#4
0
def minimax(board: np.ndarray, alpha: int, beta: int, players: List[BoardPiece], depth: int, MaxPlayer: bool) \
        -> Tuple[any, Union[PlayerAction, None]]:
    """
    :param board: State of board, 6 x 7 with either 0 or player ID [1, 2]
    :param alpha: the best value that maximizer can guarantee in the current state or before in the maximizer turn
    :param beta: the best value that minimizer can guarantee in the current state or before it in the minimizer turn
    :param players: List of players with maximizer first
    :param depth: Steps that should be evaluated
    :param MaxPlayer: Bool if it is the maximizers turn
    :return: Best value for maximizer or minimizer and the corresponding action
    """
    # Check endstate of the game after last players move
    end_state = check_end_state(board, players[0] if not MaxPlayer else players[1])
    # Return very positive/negative value if the move of the last player won the game
    if end_state == GameState.IS_WIN:
        if MaxPlayer:
            return -10**10, None
        else:
            return 10**10, None
    if end_state == GameState.IS_DRAW:
        return 0, None
    # Only evaluate the board if the game is still going on and the bottom of the tree is reached
    if end_state == GameState.STILL_PLAYING and depth == 0:
        # Evaluate how good the current board is for the maximizing player
        return eval_board(board, players), None

    if MaxPlayer:
        best_value = -np.inf
        player = players[0]
    else:
        best_value = np.inf
        player = players[1]

    # Get all the possible actions (not already full columns)
    free_columns = np.unique(np.where(board == NO_PLAYER)[1])
    # Change the order of the actions such that in case that more than one action has the same value,
    # a random action is selected
    action_values = []
    for action in free_columns:
        # Apply the action and got one steep deep deeper into the tree
        board_new = apply_player_action(board.copy(), PlayerAction(action), player)
        value, _ = minimax(board_new, alpha, beta, players, depth - 1, not MaxPlayer)
        action_values.append((action, value))
        # If the action results in a board that is better than all the previously checked actions
        # for the current player, save it and the corresponding evaluation of the board
        if MaxPlayer and value >= best_value:
            best_value = value
            best_action = action
            #alpha = max(alpha, best_value)
            #if beta <= alpha:
               # break
        if not MaxPlayer and value <= best_value:
            best_value = value
            best_action = action
            #beta = min(beta, best_value)
            #if beta <= alpha:
                #break
    #if depth == 4:
        #print(action_values)

    return best_value, best_action
示例#5
0
def minimax(board: np.ndarray, player: BoardPiece, score_dict: np.ndarray,
            depth: int, alpha: float, beta: float,
            maxplayer: bool) -> (PlayerAction, float):
    """
    Minimax algorithm with alpha-beta pruning
    :param board:
        np.ndarray: current state of the board, filled with Player pieces
    :param player:
        BoardPiece: player piece to evaluate for best move (maximazing player)
    :param score_dict:
        np.ndarray: list of score points to give to the different patterns... see board_score
    :param depth:
        int: depth of tree search
    :param alpha:
        float: keep track of best score
    :param beta:
        float: keep track of worst score
    :param maxplayer:
        bool: flag if the maximizing player is playing
    :return:
        (PlayerAction, float): best possible action and its score
    """
    # Get possible moves
    # Player possible actions
    poss_actions = (np.arange(board.shape[1],
                              dtype=PlayerAction)[board[-1, :] == NO_PLAYER])
    poss_actions = poss_actions[np.argsort(np.abs(poss_actions -
                                                  3))]  # center search bias
    pieces = np.array([PLAYER1, PLAYER2])

    # Final or end state node reached
    current_state = cc.check_end_state(board=board, player=player)
    if (depth == 0) or (current_state != cc.GameState.STILL_PLAYING):
        if (current_state == cc.GameState.IS_WIN) and ~maxplayer:
            return None, 10000 + depth
        if (current_state == cc.GameState.IS_WIN) and maxplayer:
            return None, -(10000 + depth)
        if current_state == cc.GameState.IS_DRAW:
            return None, 0
        else:
            return None, board_score(board=board,
                                     player=player,
                                     score_dict=score_dict)

    if maxplayer:
        # Initialize score
        max_score = -np.infty

        for moves in poss_actions:
            # How would a mover change my score?
            move_board = cc.apply_player_action(board=board,
                                                action=moves,
                                                player=player,
                                                copy=True)
            score = minimax(board=move_board,
                            player=player,
                            score_dict=score_dict,
                            depth=depth - 1,
                            alpha=alpha,
                            beta=beta,
                            maxplayer=False)[1]

            if score > max_score:
                max_score = score
                action = moves
            alpha = max(alpha, score)
            if beta <= alpha:
                break
        return action, max_score
    else:
        # Initialize opponent score
        min_score = np.infty
        opponent = pieces[pieces != player][0]

        for moves in poss_actions:
            # How would a mover change my score?
            move_board = cc.apply_player_action(board=board,
                                                action=moves,
                                                player=opponent,
                                                copy=True)
            score = -minimax(board=move_board,
                             player=opponent,
                             score_dict=score_dict,
                             depth=depth - 1,
                             alpha=alpha,
                             beta=beta,
                             maxplayer=True)[1]

            if score < min_score:
                min_score = score
                action = moves
            beta = min(beta, score)
        return action, min_score
示例#6
0
 def update_gamestate(self):
     self.gamestate = cm.check_end_state(self.board, self.player)
     return self