def negamax_alpha_beta(board: np.ndarray, player: BoardPiece, depth: int, alpha: float, beta: float) -> float: """ Search game tree using alpha-beta pruning with negamax. :param board: current board state :param player: current player :param depth: max depth to search in game tree :param alpha: alpha value for pruning :param beta: beta value for pruning :return: """ # if we're at an end state, if (depth == 0) or check_game_over(board): return evaluate_end_state(board, player) # otherwise loop over child nodes other_player = BoardPiece(player % 2 + 1) value = -np.inf for move in get_valid_moves(board): value = max( value, -negamax_alpha_beta( apply_player_action(board, move, player, copy=True), other_player, depth - 1, -beta, -alpha)) alpha = max(alpha, value) if alpha >= beta: break # print(f'value:{value}') # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}') # print(f'move:{move}; max value:{value}') return value
def negamax( board: np.ndarray, player: BoardPiece, depth: int, ) -> float: """ Search game tree using plain negamax. This is "colorless" negamax -- it assumes the heuristic value is from the perspective of the player its called on :param board: current board state :param player: current player :param depth: max depth to search in game tree :return: """ # if we're at an end state, if (depth == 0) or check_game_over(board): return evaluate_end_state(board, player) # otherwise loop over child nodes other_player = BoardPiece(player % 2 + 1) value = -np.inf for move in get_valid_moves(board): value = max( value, -negamax(apply_player_action(board, move, player, copy=True), other_player, depth - 1)) # print(f'value:{value}') # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}') # print(f'move:{move}; max value:{value}') return value
def test_check_game_over(): from agents.common import check_game_over from agents.common import initialize_game_state dummy_board = initialize_game_state() player = PLAYER1 assert check_game_over(dummy_board) is False horizontal_win_player1 = dummy_board.copy() horizontal_win_player1[0, 0:4] = PLAYER1 assert check_game_over(horizontal_win_player1) is True # check a vertical win vertical_win_player1 = dummy_board.copy() vertical_win_player1[0:4, 0] = PLAYER1 assert check_game_over(vertical_win_player1) is True # check a diagonal win diagonal_win_player1 = dummy_board.copy() for i in range(4): diagonal_win_player1[i, i] = PLAYER1 assert check_game_over(diagonal_win_player1) is True
def minimax_value(board: np.ndarray, player: BoardPiece, maxing: bool, depth: int) -> float: """ :param board: :param player: :param maxing: :param depth: :return: """ other_player = BoardPiece(player % 2 + 1) valid_moves = get_valid_moves(board) value = 0 if depth == 0 or check_game_over(board): return evaluate_end_state(board, player) elif maxing is True: value = -np.inf for _, move in enumerate(valid_moves): # print('Maxing') # print('move:', move) MMv = minimax_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=False, depth=depth - 1) # print('MM value:', MMv) value = max(value, MMv) else: value = np.inf for _, move in enumerate(valid_moves): # print('Mining') # print('move:', move) MMv = minimax_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=True, depth=depth - 1) # print('MM value:', MMv) value = min(value, MMv) return value
def simulate(self, node: MonteCarloNode) -> Union[BoardPiece, GameState]: """ Simulate a game from a given node -- outcome is either player or GameState.IS_DRAW :param node: :return: """ current_rollout_state = node.board.copy() curr_player = node.to_play while not check_game_over(current_rollout_state): possible_moves = get_valid_moves(current_rollout_state) if possible_moves.size > 1: action = np.random.choice(list(possible_moves)) else: action = possible_moves current_rollout_state = apply_player_action(current_rollout_state, action, curr_player, copy=True) curr_player = BoardPiece(curr_player % 2 + 1) return evaluate_end_state(current_rollout_state)
def run_search(self, state: np.ndarray, to_play: BoardPiece, n_sims=3000): """ Find an unexpanded, non-terminal node, expand it, simulate a game from it, and backprop the result. :param state: :param to_play: which player's turn it is :param n_sims: iterations to run :return: """ self.make_node(state, to_play) for _ in range(n_sims): node = self.select(state, to_play) if check_game_over(node.board) is False: node = self.expand(node) winner = self.simulate(node) else: winner = self.simulate(node) self.backpropagate(node, winner)
def alpha_beta_value(board: np.ndarray, player: BoardPiece, maxing: bool, depth: int, alpha, beta) -> float: other_player = BoardPiece(player % 2 + 1) valid_moves = get_valid_moves(board) if depth == 0 or check_game_over(board): return evaluate_end_state(board, player) elif maxing is True: value = -np.inf for _, move in enumerate(valid_moves): ABv = alpha_beta_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=False, depth=depth - 1, alpha=alpha, beta=beta) value = max(value, ABv) alpha = max(alpha, value) if alpha >= beta: break return value else: value = np.inf for _, move in enumerate(valid_moves): ABv = alpha_beta_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=True, depth=depth - 1, alpha=alpha, beta=beta) value = min(value, ABv) beta = min(beta, value) if beta <= alpha: break return value
def is_game_over(self): """ Check if this node is a terminal game state (including draws, wins). :return: Bool """ return check_game_over(self.board)