def generate_move_minimax( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ :param board: :param player: :param saved_state: :return: """ open_moves = get_valid_moves(board) print(f'Open moves: {open_moves}') new_states = [ apply_player_action(board, move, player, copy=True) for move in open_moves ] # if a move results in a win, play it winning_moves = np.array([ check_end_state(state, player) for state in new_states ]) == GameState.IS_WIN if np.any(winning_moves): actions = open_moves[np.argwhere(winning_moves)].squeeze() if actions.size > 1: action = np.random.choice(actions) else: action = actions print(f'playing action {action} for a win') return action, saved_state # if a move results in blocking an opponent's win, play it other_player = BoardPiece(player % 2 + 1) new_states_other = [ apply_player_action(board, move, other_player, copy=True) for move in open_moves ] blocking_moves = np.array([ check_end_state(state, other_player) for state in new_states_other ]) == GameState.IS_WIN if np.any(blocking_moves): actions = open_moves[np.argwhere(blocking_moves)].squeeze() if actions.size > 1: action = np.random.choice(actions) else: action = actions print(f'playing action {action} for a block') return action, saved_state # otherwise, use the heuristic function to score possible states # scores = [minimax_value(apply_player_action(board, move, player, copy=True), player, True, MAX_DEPTH) for move in open_moves] scores = [ alpha_beta_value(apply_player_action(board, move, player, copy=True), player, True, MAX_DEPTH, alpha=-np.inf, beta=np.inf) for move in open_moves ] # randomly select among best moves if np.sum(scores == np.max(scores)) > 1: best_moves = open_moves[np.argwhere( scores == np.max(scores))].squeeze() action = np.random.choice(best_moves) else: action = open_moves[np.argmax(scores)].squeeze() print(f'Heuristic values: {scores}') print(f'playing action {action} with heuristic value {np.max(scores)}') return action, saved_state
def minimax(board: np.ndarray, depth: int, alpha: int, beta: int, player: BoardPiece, maximizing_player: bool) -> Tuple[int, int]: ''' Returns a column where action should be placed and the min and max score for GameState :param board: current state of board :param depth: depth of search tree :param maximizingPlayer: True if we want to max for player :return: min or max score for action of player ''' #check which player is the agent so that we don't max/min for wrong player if player == PLAYER1: opponent_player = PLAYER2 else: opponent_player = PLAYER1 #check which columns are currently open open_cols = np.asarray(check_open_columns(board)) #check if depth is 0 if depth == 0: score = heuristic(board, player) return None, score #check if we're at a leaf/terminal node if check_end_state(board, player) != GameState.STILL_PLAYING: if connected_four(board, player): #agent won return None, 100000 if connected_four(board, opponent_player): #opponent won return None, -100000 else: #must be a draw return None, 0 if maximizing_player: #get max score for agent score = -math.inf for column in open_cols: #now simulate making a move and check what score it would get, save the original board in board board, board_copy = apply_player_action(board, column, player, True) # recursive call to minimax with depth-1 with board_copy so board isn't modified next_score = minimax(board_copy, depth - 1, alpha, beta, player, False)[1] #only get the score #if the score is better save score and column if next_score > score: score = next_score action_column = column #evaluate alpha for early stopping alpha = max(alpha, score) if alpha >= beta: #don't evaluate more options down this path of tree break return action_column, score else: score = math.inf for column in open_cols: board, action_board = apply_player_action(board, column, opponent_player, True) next_score = minimax(action_board, depth - 1, alpha, beta, player, True)[1] if next_score < score: score = next_score action_column = column beta = min( beta, score) #here we want to minimize since we're opponent player if alpha >= beta: break return action_column, score
def alpha_beta(board: Bitmap, mask: Bitmap, max_player: bool, depth: int, alpha: GameScore, beta: GameScore, board_shp: Tuple ) -> Tuple[GameScore, Optional[PlayerAction]]: """ Recursively call alpha_beta to build a game tree to a pre-determined max depth. Once at the max depth, or at a terminal node, calculate and return the heuristic score. Scores farther down the tree are penalized. :param board: bitmap representing positions of current player :param mask: bitmap representing positions of both players :param max_player: boolean indicating whether the depth at which alpha_beta is called from is a maximizing or minimizing player :param depth: the current depth in the game tree :param alpha: the currently best score for the maximizing player along the path to root :param beta: the currently best score for the minimizing player along the path to root :param board_shp: the shape of the game board :return: the best action and the associated score """ # If the node is at the max depth or a terminal node calculate the score max_depth = 7 win_score = 150 state_p = check_end_state(board, mask, board_shp) # not_player = board ^ mask # state_np = check_end_state(not_player, mask, board_shp) if state_p == GameState.IS_WIN: if max_player: return GameScore(win_score), None else: return GameScore(-win_score), None # elif state_np == GameState.IS_WIN: # if max_player: # return GameScore(-win_score), None # else: # return GameScore(win_score), None elif state_p == GameState.IS_DRAW: return 0, None elif depth == max_depth: return heuristic_solver_bits(board, mask, board_shp[0], max_player), None # For each potential action, call alpha_beta if max_player: score = -100000 for col in range(board_shp[1]): # Apply the current action, continue if column is full try: min_board, new_mask = apply_player_action_cp(board, mask, col, board_shp[0]) except IndexError: continue # Call alpha-beta new_score, temp = alpha_beta(min_board, new_mask, False, depth + 1, alpha, beta, board_shp) new_score -= depth # Check whether the score updates if new_score > score: score = new_score action = col # Check whether we can prune the rest of the branch if score >= beta: # print('Pruned a branch') break # Check whether alpha updates the score if score > alpha: alpha = score return GameScore(score), PlayerAction(action) else: score = 100000 for col in range(board_shp[1]): # Apply the current action, continue if column is full try: max_board, new_mask = apply_player_action_cp(board, mask, col, board_shp[0]) except IndexError: continue # Call alpha-beta new_score, temp = alpha_beta(max_board, new_mask, True, depth + 1, alpha, beta, board_shp) new_score += depth # Check whether the score updates if new_score < score: score = new_score action = col # Check whether we can prune the rest of the branch if score <= alpha: # print('Pruned a branch') break # Check whether alpha updates the score if score < beta: beta = score return GameScore(score), PlayerAction(action)
def minimax(board: np.ndarray, alpha: int, beta: int, players: List[BoardPiece], depth: int, MaxPlayer: bool) \ -> Tuple[any, Union[PlayerAction, None]]: """ :param board: State of board, 6 x 7 with either 0 or player ID [1, 2] :param alpha: the best value that maximizer can guarantee in the current state or before in the maximizer turn :param beta: the best value that minimizer can guarantee in the current state or before it in the minimizer turn :param players: List of players with maximizer first :param depth: Steps that should be evaluated :param MaxPlayer: Bool if it is the maximizers turn :return: Best value for maximizer or minimizer and the corresponding action """ # Check endstate of the game after last players move end_state = check_end_state(board, players[0] if not MaxPlayer else players[1]) # Return very positive/negative value if the move of the last player won the game if end_state == GameState.IS_WIN: if MaxPlayer: return -10**10, None else: return 10**10, None if end_state == GameState.IS_DRAW: return 0, None # Only evaluate the board if the game is still going on and the bottom of the tree is reached if end_state == GameState.STILL_PLAYING and depth == 0: # Evaluate how good the current board is for the maximizing player return eval_board(board, players), None if MaxPlayer: best_value = -np.inf player = players[0] else: best_value = np.inf player = players[1] # Get all the possible actions (not already full columns) free_columns = np.unique(np.where(board == NO_PLAYER)[1]) # Change the order of the actions such that in case that more than one action has the same value, # a random action is selected action_values = [] for action in free_columns: # Apply the action and got one steep deep deeper into the tree board_new = apply_player_action(board.copy(), PlayerAction(action), player) value, _ = minimax(board_new, alpha, beta, players, depth - 1, not MaxPlayer) action_values.append((action, value)) # If the action results in a board that is better than all the previously checked actions # for the current player, save it and the corresponding evaluation of the board if MaxPlayer and value >= best_value: best_value = value best_action = action #alpha = max(alpha, best_value) #if beta <= alpha: # break if not MaxPlayer and value <= best_value: best_value = value best_action = action #beta = min(beta, best_value) #if beta <= alpha: #break #if depth == 4: #print(action_values) return best_value, best_action
def minimax(board: np.ndarray, player: BoardPiece, score_dict: np.ndarray, depth: int, alpha: float, beta: float, maxplayer: bool) -> (PlayerAction, float): """ Minimax algorithm with alpha-beta pruning :param board: np.ndarray: current state of the board, filled with Player pieces :param player: BoardPiece: player piece to evaluate for best move (maximazing player) :param score_dict: np.ndarray: list of score points to give to the different patterns... see board_score :param depth: int: depth of tree search :param alpha: float: keep track of best score :param beta: float: keep track of worst score :param maxplayer: bool: flag if the maximizing player is playing :return: (PlayerAction, float): best possible action and its score """ # Get possible moves # Player possible actions poss_actions = (np.arange(board.shape[1], dtype=PlayerAction)[board[-1, :] == NO_PLAYER]) poss_actions = poss_actions[np.argsort(np.abs(poss_actions - 3))] # center search bias pieces = np.array([PLAYER1, PLAYER2]) # Final or end state node reached current_state = cc.check_end_state(board=board, player=player) if (depth == 0) or (current_state != cc.GameState.STILL_PLAYING): if (current_state == cc.GameState.IS_WIN) and ~maxplayer: return None, 10000 + depth if (current_state == cc.GameState.IS_WIN) and maxplayer: return None, -(10000 + depth) if current_state == cc.GameState.IS_DRAW: return None, 0 else: return None, board_score(board=board, player=player, score_dict=score_dict) if maxplayer: # Initialize score max_score = -np.infty for moves in poss_actions: # How would a mover change my score? move_board = cc.apply_player_action(board=board, action=moves, player=player, copy=True) score = minimax(board=move_board, player=player, score_dict=score_dict, depth=depth - 1, alpha=alpha, beta=beta, maxplayer=False)[1] if score > max_score: max_score = score action = moves alpha = max(alpha, score) if beta <= alpha: break return action, max_score else: # Initialize opponent score min_score = np.infty opponent = pieces[pieces != player][0] for moves in poss_actions: # How would a mover change my score? move_board = cc.apply_player_action(board=board, action=moves, player=opponent, copy=True) score = -minimax(board=move_board, player=opponent, score_dict=score_dict, depth=depth - 1, alpha=alpha, beta=beta, maxplayer=True)[1] if score < min_score: min_score = score action = moves beta = min(beta, score) return action, min_score
def update_gamestate(self): self.gamestate = cm.check_end_state(self.board, self.player) return self