def test1(): player1 = MinimaxPlayer() player2 = MinimaxPlayer() game = Board(player1, player2) # game.apply_move((1,1)) # game.apply_move((2,2)) # print (game._board_state) # print(game.to_string()) # print(len(game._board_state)) explored = [9,11,12,14,15,16,17,20,24,25,29,30,33,38,39,44] for i in explored: game._board_state[i] = 1 game._board_state[-1] = 17 game._board_state[-2] = 9 # game.apply_move((5, 3)) # player 1 # game.apply_move((4, 2)) # player 2 print (game.to_string()) moves = game.get_legal_moves() print(moves) for m in moves: fm = game.forecast_move(m).get_legal_moves() print (str(m) + " -->" + str(fm)) # player 1 for m in moves: print (str(m) + " --> " + str(player1.score(game.forecast_move(m), player1))) print (player1.get_move(game, 6))
def test2(): player1 = AlphaBetaPlayer() player2 = AlphaBetaPlayer() game = Board(player1, player2, 9, 9) game._board_state = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, 22] #print(player1.get_move(game, 6)) moves = game.get_legal_moves() for m in moves: fm = game.forecast_move(m).get_legal_moves() print (str(m) + " -->" + str(fm) + str(player1.score(game.forecast_move(m),player1))) print (player1.get_move(game, 6))
def get_move(self, game: Board, time_left) -> Tuple[int, int]: """Select the move from the available legal moves with the highest heuristic score. Parameters ---------- game : `isolation.Board` An instance of `isolation.Board` encoding the current state of the game (e.g., player locations and blocked cells). time_left : callable A function that returns the number of milliseconds left in the current turn. Returning with any less than 0 ms remaining forfeits the game. Returns ---------- (int, int) The move in the legal moves list with the highest heuristic score for the current game state; may return (-1, -1) if there are no legal moves. """ legal_moves = game.get_legal_moves() if not legal_moves: return (-1, -1) _, move = max([(self.score(game.forecast_move(m), self), m) for m in legal_moves]) return move
def test_hash_different(self): board = Board(AlphaBetaPlayer(), RandomPlayer()) board.apply_move(random.choice(board.get_legal_moves())) board.apply_move(random.choice(board.get_legal_moves())) b1 = board.forecast_move(board.get_legal_moves()[0]) b2 = board.forecast_move(board.get_legal_moves()[1]) self.assertNotEqual(b1.__hash__(), b2.__hash__())
def minimax(self, game: Board, depth: int, maximizing_player: bool = True) -> Tuple[float, Move]: """Implement the minimax search algorithm as described in the lectures. Parameters ---------- game : isolation.Board An instance of the Isolation game `Board` class representing the current game state depth : int Depth is an integer representing the maximum number of plies to search in the game tree before aborting maximizing_player : bool Flag indicating whether the current search depth corresponds to a maximizing layer (True) or a minimizing layer (False) Returns ------- float The score for the current search branch tuple(int, int) The best move for the current branch; (-1, -1) for no legal moves Notes ----- (1) You MUST use the `self.score()` method for board evaluation to pass the project unit tests; you cannot call any other evaluation function directly. """ if self.time_left() < self.timer_threshold: raise Timeout() best_move = (-1, -1) best_score = float("-inf") if maximizing_player else float("inf") comparison = max if maximizing_player else min if depth is 0: return self.score(game, self), best_move for move in game.get_legal_moves(): score, _ = self.minimax(game.forecast_move(move), depth - 1, not maximizing_player) best_score, best_move = comparison((best_score, best_move), (score, move)) return best_score, best_move
def min_play(g: Board, d: int) -> Tuple[float, Tuple[int, int]]: moves = g.get_legal_moves() best_score, best_move = float("inf"), (-1, -1) for move in moves: if time_left and time_left() < self.TIMER_THRESHOLD: raise Timeout() new_game = g.forecast_move(move) if d == 1: score = self.score(new_game, self) else: score, _ = max_play(new_game, d - 1) best_score, best_move = min([(best_score, best_move), (score, move)]) return best_score, best_move
def play(Q1, Q2, size=7, time_limit=1000, print_moves=False, seed=None): import random if seed is not None: random.seed(seed) game = Board(Q1, Q2, size, size) # assign a random move to each player before playing for idx in range(2): moves = game.get_active_moves() random.shuffle(moves) game = game.forecast_move(moves[0])[0] winner, move_history, termination = game.play_isolation( time_limit=time_limit, print_moves=print_moves) #print("\n" , " Game finished in moves: ", game.move_count) print("\n", winner, " has won. Reason: ", termination) return winner, move_history, termination
def min_play(g: Board, d: int, a: float, b: float) -> Tuple[float, Tuple[int, int]]: moves = moves_by_rank(g, reverse=False) best_score, best_move = float("inf"), (-1, -1) for _, move in moves: if time_left and time_left() < self.TIMER_THRESHOLD: raise Timeout() if best_score <= a: break else: new_game = g.forecast_move(move) if d == 1: score = self.score(new_game, self) else: score, _ = max_play(new_game, d - 1, a, min(best_score, b)) best_score, best_move = min([(best_score, best_move), (score, move)]) return best_score, best_move
def test_play(self): from isolation import Board # create an isolation board (by default 7x7) player1 = game_agent.AlphaBetaPlayer() player2 = sample_players.RandomPlayer() game = Board(player1, player2) # place player 1 on the board at row 2, column 3, then place player 2 on # the board at row 0, column 5; display the resulting board state. Note # that the .apply_move() method changes the calling object in-place. game.apply_move((4, 3)) game.apply_move((3, 4)) print(game.to_string()) # players take turns moving on the board, so player1 should be next to move assert (player1 == game.active_player) # get a list of the legal moves available to the active player print("Player: ", game.active_player, " with current moves: ") print(game.get_legal_moves()) # get a successor of the current state by making a copy of the board and # applying a move. Notice that this does NOT change the calling object # (unlike .apply_move()). new_game = game.forecast_move((1, 1)) assert (new_game.to_string() != game.to_string()) print("\nOld state:\n{}".format(game.to_string())) print("\nNew state:\n{}".format(new_game.to_string())) # play the remainder of the game automatically -- outcome can be "illegal # move", "timeout", or "forfeit" winner, history, outcome = game.play() print("\nWinner: {}\nOutcome: {}".format(winner, outcome)) print(game.to_string()) print("Move history:\n{!s}".format(history))
player2 = GreedyPlayer() game = Board(player1, player2) # place player 1 on the board at row 2, column 3, then place player 2 on # the board at row 0, column 5; display the resulting board state. Note # that the .apply_move() method changes the calling object in-place. game.apply_move((2, 3)) game.apply_move((0, 5)) print(game.to_string()) # players take turns moving on the board, so player1 should be next to move assert (player1 == game.active_player) # get a list of the legal moves available to the active player print(game.get_legal_moves()) # get a successor of the current state by making a copy of the board and # applying a move. Notice that this does NOT change the calling object # (unlike .apply_move()). new_game = game.forecast_move((1, 1)) assert (new_game.to_string() != game.to_string()) print("\nOld state:\n{}".format(game.to_string())) print("\nNew state:\n{}".format(new_game.to_string())) # play the remainder of the game automatically -- outcome can be "illegal # move", "timeout", or "forfeit" winner, history, outcome = game.play() print("\nWinner: {}\nOutcome: {}".format(winner, outcome)) print(game.to_string()) print("Move history:\n{!s}".format(history))
def alphabeta(self, game: Board, depth: int, alpha: float = float("-inf"), beta: float = float("inf"), maximizing_player: bool = True) -> Tuple[float, Move]: """Implement minimax search with alpha-beta pruning as described in the lectures. Parameters ---------- game : isolation.Board An instance of the Isolation game `Board` class representing the current game state depth : int Depth is an integer representing the maximum number of plies to search in the game tree before aborting alpha : float Alpha limits the lower bound of search on minimizing layers beta : float Beta limits the upper bound of search on maximizing layers maximizing_player : bool Flag indicating whether the current search depth corresponds to a maximizing layer (True) or a minimizing layer (False) Returns ------- float The score for the current search branch tuple(int, int) The best move for the current branch; (-1, -1) for no legal moves Notes ----- (1) You MUST use the `self.score()` method for board evaluation to pass the project unit tests; you cannot call any other evaluation function directly. """ if self.time_left() < self.timer_threshold: raise Timeout() best_move = (-1, -1) best_score = alpha if maximizing_player else beta if depth is 0: return self.score(game, self), best_move for move in game.get_legal_moves(): future_game = game.forecast_move(move) score, _ = self.alphabeta(future_game, depth - 1, alpha, beta, not maximizing_player) if maximizing_player: if score > best_score: best_score, best_move = score, move if best_score >= beta: return best_score, best_move alpha = max(alpha, best_score) else: if score < best_score: best_score, best_move = score, move if best_score <= alpha: return best_score, best_move beta = min(beta, best_score) return best_score, best_move
player2 = GreedyPlayer() game = Board(player1, player2) # place player 1 on the board at row 2, column 3, then place player 2 on # the board at row 0, column 5; display the resulting board state. Note # that the .apply_move() method changes the calling object in-place. game.apply_move((2, 3)) game.apply_move((0, 5)) print(game.to_string()) # players take turns moving on the board, so player1 should be next to move assert(player1 == game.active_player) # get a list of the legal moves available to the active player print(game.get_legal_moves()) # get a successor of the current state by making a copy of the board and # applying a move. Notice that this does NOT change the calling object # (unlike .apply_move()). new_game = game.forecast_move((1, 1)) assert(new_game.to_string() != game.to_string()) print("\nOld state:\n{}".format(game.to_string())) print("\nNew state:\n{}".format(new_game.to_string())) # play the remainder of the game automatically -- outcome can be "illegal # move", "timeout", or "forfeit" winner, history, outcome = game.play() print("\nWinner: {}\nOutcome: {}".format(winner, outcome)) print(game.to_string()) print("Move history:\n{!s}".format(history))
# players take turns moving on the board, so player1 should be next to move assert (player1 == game.active_player) # get a list of the legal moves available to the active player print('legal moves', game.get_legal_moves()) action = player1.get_move(game, game.get_legal_moves(), timeit.default_timer()) #print 'ACTION', action #new_game = game.forecast_move(action) #assert(new_game.to_string() != game.to_string()) #print("\nOld state:\n{}".format(game.to_string())) #print("\nNew state:\n{}".format(new_game.to_string())) # get a successor of the current state by making a copy of the board and # applying a move. Notice that this does NOT change the calling object # (unlike .apply_move()). new_game = game.forecast_move((action)) assert (new_game.to_string() != game.to_string()) print("\nOld state:\n{}".format(game.to_string())) print("\nNew state:\n{}".format(new_game.to_string())) #play the remainder of the game automatically -- outcome can be "illegal #move" or "timeout"; it should _always_ be "illegal move" in this example winner, history, outcome = game.play() print("\nWinner: {}\nOutcome: {}".format(winner, outcome)) print(game.to_string()) print("Move history:\n{!s}".format(history))
def alphabeta( self, game: Board, depth: int, alpha=float("-inf"), beta=float("inf") ) -> Tuple[Tuple[int, int], float]: """Implement depth-limited minimax search with alpha-beta pruning as described in the lectures. This should be a modified version of ALPHA-BETA-SEARCH in the AIMA text https://github.com/aimacode/aima-pseudocode/blob/master/md/Alpha-Beta-Search.md ********************************************************************** You MAY add additional methods to this class, or define helper functions to implement the required functionality. ********************************************************************** Parameters ---------- game : isolation.Board An instance of the Isolation game `Board` class representing the current game state depth : int Depth is an integer representing the maximum number of plies to search in the game tree before aborting alpha : float Alpha limits the lower bound of search on minimizing layers beta : float Beta limits the upper bound of search on maximizing layers Returns ------- (int, int) The board coordinates of the best move found in the current search; (-1, -1) if there are no legal moves Notes ----- (1) You MUST use the `self.score()` method for board evaluation to pass the project tests; you cannot call any other evaluation function directly. (2) If you use any helper functions (e.g., as shown in the AIMA pseudocode) then you must copy the timer check into the top of each helper function or else your agent will timeout during testing. """ if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() best_move, best_score = ( -1, -1), alpha # Bestmöglicher Zug & bestmögliche Bewertung moves = game.get_legal_moves() if depth == 0 or len(moves) == 0: return (-1, -1), self.score(game, game.active_player) for move in moves: next_game = game.forecast_move(move) score = -self.alphabeta(next_game, depth - 1, -beta, -best_score)[1] if best_score < score: # Wir suchen ja nach dem Zug mit der besten Bewertung best_move, best_score = move, score if best_score >= beta: break return best_move, best_score
def minimax(self, game: Board, depth: int) -> Tuple[Tuple[int, int], float]: """Implement depth-limited minimax search algorithm as described in the lectures. This should be a modified version of MINIMAX-DECISION in the AIMA text. https://github.com/aimacode/aima-pseudocode/blob/master/md/Minimax-Decision.md ********************************************************************** You MAY add additional methods to this class, or define helper functions to implement the required functionality. ********************************************************************** Parameters ---------- game : isolation.Board An instance of the Isolation game `Board` class representing the current game state depth : int Depth is an integer representing the maximum number of plies to search in the game tree before aborting Returns ------- (int, int) The board coordinates of the best move found in the current search; (-1, -1) if there are no legal moves Notes ----- (1) You MUST use the `self.score()` method for board evaluation to pass the project tests; you cannot call any other evaluation function directly. (2) If you use any helper functions (e.g., as shown in the AIMA pseudocode) then you must copy the timer check into the top of each helper function or else your agent will timeout during testing. """ if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() moves = game.get_legal_moves() if depth == 0 or len( moves ) == 0: # Maximale Suchtiefe erreicht oder keine Züge mehr möglich (Spiel zuende) return (-1, -1), self.score( game, game.active_player ) # wichtig ist nur die Bewertung für den Zug. second = lambda tup: tup[1] return max( # 4. Gib den bestmöglichen Zug zurück ( ( move, # 3. Fasse das in einem Tupel aus Zug und Bewertung für den Zug zusammen -self.minimax(game.forecast_move(move), depth - 1)[1] ) # 2. Rufe minmax rekursiv auf um Bewertung für den Zug herauszufinden. for move in moves) # 1. Für jeden möglichen Zug , key=second ) # 5. Weil wir das größtmögliche Element in einer Liste aus Tupeln von Zug und Bewertung suchen, und man ja nicht den größtmöglichen Zug haben kann, geben wir mit der Funktion an, dass die größte Bewertung zur Suche nach dem größtmöglichen Element verwendet werden soll.