def generate_move_mcts( board: Board, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ Agent selects a move based on a minimax depth first search, with alpha-beta pruning. :param board: 2d array representing current state of the game :param player: the player who made the last move (active player) :param saved_state: ??? :return: the agent's selected move """ # TODO: return chosen action subtree using saved_state, to improve # performance # Calculate the board shape bd_shp = board.shape # If the board is empty, play in the center column if np.all(board == NO_PLAYER): action = np.floor(np.median(np.arange(bd_shp[1]))) return PlayerAction(action), saved_state # Convert the board to bitmaps and define the max_player board max_board, mask_board = board_to_bitmap(board, player) # Create a root node root_mcts = Connect4Node(max_board, mask_board, bd_shp, -1, True) # Call MCTS action = mcts(root_mcts) return PlayerAction(action), saved_state
def test_agents(): """ Test that the agents minimax and MCTS take immediate wins and block immediate losses""" empty_board = initialize_game_state() n_rows, n_cols = empty_board.shape for player in players: opponent = PLAYER1 if player == PLAYER2 else PLAYER2 # Test for immediate wins (p=player) and immediate losses (p=opponent) for p in (player, opponent): board_col = empty_board.copy() board_row = empty_board.copy() # Check for win and loss in a row and column for i in range(CONNECT_N - 1): board_row = apply_player_action(board_row, PlayerAction(i), p) board_col = apply_player_action(board_col, PlayerAction(CONNECT_N - 1), p) # Check that both agents make the right move (always column CONNECT_N -1 = 3) for move_agent in move_agents: for board in [board_row, board_col]: action = move_agent(board, player, None)[0] assert action == PlayerAction(CONNECT_N - 1) # Test that the agent blocks a certain win of the opponent (two free player pieces in the middle of # the board generate a certain win if the player does not put his piece to the right of left) board = empty_board.copy() board[-1, 1:3] = opponent for move_agent in move_agents: action = move_agent(board, player, None)[0] assert action == PlayerAction(0) or action == PlayerAction(3)
def generate_move_alpha_beta( board: Board, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ Agent selects a move based on a minimax depth first search, with alpha-beta pruning. :param board: 2d array representing current state of the game :param player: the player who made the last move (active player) :param saved_state: ??? :return: the agent's selected move """ # If the board is empty, play in the center column if np.all(board == NO_PLAYER): action = np.floor(np.median(np.arange(board.shape[1]))) return PlayerAction(action), saved_state # Convert the board to bitmaps and define the min_player board max_board, mask_board = board_to_bitmap(board, player) # Call alpha_beta alpha0 = -100000 beta0 = 100000 score, action = alpha_beta(max_board, mask_board, True, 0, alpha0, beta0, board.shape) return PlayerAction(action), saved_state
def user_move(board: np.ndarray, _player: BoardPiece, saved_state: Optional[SavedState]): action = PlayerAction(-1) while not 0 <= action < board.shape[1]: try: action = PlayerAction(input("Column? ")) except ValueError: print("Input could not be converted to the dtype PlayerAction, try entering an integer.") return action, saved_state
def user_move(board: np.ndarray, _player: BoardPiece, saved_state: Optional[SavedState]): action = PlayerAction(-1) while not 0 <= action < board.shape[1]: try: action = PlayerAction(input("Column? ")) except: pass return action, saved_state
def generate_move_random( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: action = PlayerAction(-1) # Choose a valid, non-full column randomly and return it as `action` if player == BoardPiece(2): valid_columns = [] for col in range(COLUMNS): if board[ROWS - 1][col] == 0: valid_columns.append(col) action = PlayerAction(random.sample(valid_columns, 1)) return action, saved_state
def ucb1_func(node: Node, c: float) -> Node: """ Returns the most urgent child to visit using the Upper Bound Confidence interval :param node: Node: current node in which to check for most urgent child :param c: float: exploration parameter :return: Node: node of the most urgent child to visit """ # Initialize variables child_action_key, urgent_child = None, None ucb1_max = -np.inf # Use UCB1 to select the next node to visit for a, child in zip(node.children.keys(), node.children.values()): ucb1 = (child.r / child.n) + c * (( (2 * log(node.n)) / child.n)**(1 / 2)) # Select node with highest UCB1 if ucb1 >= ucb1_max: ucb1_max = ucb1 # update max UCB1 urgent_child = child child_action_key = a return urgent_child, PlayerAction(child_action_key)
def generate_move_random(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]): """Random getting a board and the corresponding player turn and returning a non-full column. Yields a non-full column action to be performed considering the board state. Args: board: Current state of the board player: Whose turn is it. saved_state: Optimal pre-computation work performed in previous steps. Returns: action: Column to use. saved_state: Not-yet implemented, but needed for the main.py algorithm. """ exit_yes = False old_board = board.copy() action = np.array([0]) while not exit_yes: action = PlayerAction(np.random.randint(7)) old_board, position = apply_player_action(old_board, action, player, True, True) if position != 0: exit_yes = True return action, saved_state
def ucb1_func(node: Node, c: float) -> Node: """ Returns the most urgent child to visit using the Upper Bound Confidence interval :param node: Node: current node in which to check for most urgent child :param c: float: exploration parameter :return: Node: node of the most urgent child to visit """ # Use UCB1 to select the next node to visit child_action_key = None ucb1_max = -np.inf for a in node.children.keys(): child = node.children[a] ucb1 = (child.r / child.n) + c * np.sqrt( (2 * np.log(node.n)) / child.n) # Select node with highest UCB1 if ucb1 >= ucb1_max: ucb1_max = ucb1 # update max UCB1 child_action_key = a return node.children[child_action_key], PlayerAction(child_action_key)
def alpha_beta(board: np.ndarray, player: BoardPiece, depth: np.int, alpha: np.float, beta: np.float, maximizingPlayer: bool) -> Tuple[PlayerAction, np.float]: # Choose a valid, non-full column randomly and return it as `action` valid_columns = np.where(board[-1, :] == 0)[0] opp_player = PLAYER2 if player == PLAYER1 else PLAYER1 game_state = check_end_state(board, opp_player if maximizingPlayer else player) if depth == 0 or game_state in (GameState.IS_DRAW, GameState.IS_WIN): if game_state == GameState.IS_WIN: if maximizingPlayer: return PlayerAction(-1), -1000000000000 else: return PlayerAction(-1), 1000000000000 elif game_state == GameState.IS_DRAW: return PlayerAction( np.random.choice(np.array(valid_columns).flatten(), 1)), 0 else: # depth = 0 return PlayerAction( np.random.choice(np.array(valid_columns).flatten(), 1)), score_position(board, player) if maximizingPlayer: value = -math.inf column = np.random.choice(np.array(valid_columns).flatten(), 1) for col in valid_columns: # board_copy = board.copy() new_board = apply_player_action(board, PlayerAction(col), player, True) new_score = alpha_beta(new_board, opp_player, depth - 1, alpha, beta, False)[1] if new_score > value: value = new_score column = col alpha = max(alpha, value) if alpha >= beta: break return PlayerAction(column), value else: # Minimizing player value = math.inf column = np.random.choice(np.array(valid_columns).flatten(), 1) for col in valid_columns: # board_copy = board.copy() new_board = apply_player_action(board, PlayerAction(col), player, True) new_score = alpha_beta(new_board, opp_player, depth - 1, alpha, beta, True)[1] if new_score < value: value = new_score column = col beta = min(beta, value) if alpha >= beta: break return PlayerAction(column), value
def test_connected_four_horizontal(self): c4_yes = common.initialize_game_state() common.apply_player_action(c4_yes, PlayerAction(0), common.PLAYER1) common.apply_player_action(c4_yes, PlayerAction(1), common.PLAYER1) common.apply_player_action(c4_yes, PlayerAction(2), common.PLAYER1) common.apply_player_action(c4_yes, PlayerAction(3), common.PLAYER1) c4_no = common.initialize_game_state() common.apply_player_action(c4_no, PlayerAction(0), common.PLAYER1) common.apply_player_action(c4_no, PlayerAction(1), common.PLAYER1) common.apply_player_action(c4_no, PlayerAction(2), common.PLAYER2) common.apply_player_action(c4_no, PlayerAction(3), common.PLAYER1) assert common.connected_four(c4_yes, PLAYER1) == True assert common.connected_four(c4_yes, PLAYER1, PlayerAction(3)) == True assert common.connected_four(c4_no, PLAYER1) == False assert common.connected_four(c4_no, PLAYER1, PlayerAction(3)) == False
def test_apply_player_action(): from agents.common import apply_player_action, PlayerAction board = np.zeros((6, 7), dtype=BoardPiece) action = PlayerAction(2) player = BoardPiece(2) copy = True ret = apply_player_action(board, action, player, copy) assert isinstance(ret, np.ndarray)
def test_apply_player_action(): from agents.common import apply_player_action from agents.common import initialize_game_state dummy_board = initialize_game_state() dummy_board[0, 0] = PLAYER1 test_board = initialize_game_state() #with copying copied_test_board = apply_player_action(test_board, PlayerAction(0), PLAYER1, copy=True) #without copying apply_player_action(test_board, PlayerAction(0), PLAYER1) assert (copied_test_board == dummy_board).all() assert (test_board == dummy_board).all()
def user_move(board: np.ndarray, _player: BoardPiece, saved_state: Optional[SavedState], args): """ :param board: State of board, 6 x 7 with either 0 or player ID [1, 2] :param _player: Player ID of the user :param saved_state: not used this implementation of the user move generation :param args: Optional parameter :return: Column the user wants to drop his player """ action = PlayerAction(-1) move_worked = None # Make sure that a column is selected which is in the range of the board and is not already full while not 0 <= action < board.shape[1] or move_worked is None: try: action = PlayerAction(input("Column? ")) move_worked = apply_player_action(board, action, _player) except: pass return action, SavedState()
def test_apply_player_action(): from agents.common import initialize_game_state, apply_player_action action = PlayerAction(3) player = PLAYER1 board = b1 board_after_action = apply_player_action(board, action, player) assert board_after_action.shape == board.shape assert board_after_action.any() == PLAYER1 assert board_after_action[:, action].any() == PLAYER1
def generate_move_minimax( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: alpha = -math.inf beta = math.inf depth = 4 # Choose a valid, non-full column that maximizes score and return it as `action` action = minimax(board, depth, alpha, beta, player, True)[0] return PlayerAction(action), saved_state
def generate_move_random( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: # Choose a valid, non-full column randomly and return it as `action` topRow = board[5, :] idxList = [] for idx, col in enumerate(topRow): if col == 0: idxList.append(idx) action = PlayerAction(random.choice(idxList)) return action, saved_state
def test_random(): from agents.agents_random.random import generate_move_random board = np.array([[1, 2, 2, 0, 1, 2, 2], [2, 1, 1, 2, 1, 2, 2], [2, 2, 1, 1, 1, 2, 2], [2, 1, 2, 2, 2, 1, 1], [1, 2, 1, 1, 1, 2, 2], [1, 1, 2, 1, 2, 1, 2]]) action, saved_state = generate_move_random(board,BoardPiece(1),saved_state=0) assert isinstance(action,PlayerAction) assert action == PlayerAction(3) #Taking the empty one
def generate_move_random( board: np.ndarray, _player: BoardPiece, saved_state: Optional[SavedState] = None ) -> Tuple[PlayerAction, SavedState]: # Choose a valid, non-full column randomly and return it as `action` valid_columns = np.where(board[-1, :] == 0) action = PlayerAction( np.random.choice(np.array(valid_columns).flatten(), 1)) return action, saved_state
def generate_move_random(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState], args=None) \ -> Tuple[PlayerAction, SavedState]: """ :param board: State of board, 6 x 7 with either 0 or player ID [1, 2] :param player: Player ID of random agent :param saved_state: Not used in this implementation of the random move generation :param args: Optional parameter :return: Column in which player wants to make his move (chosen randomly) """ # Get column indexes where there is no player and choose one empty column randomly action = np.random.choice(np.unique(np.where(board == NO_PLAYER)[1])) return PlayerAction(action), SavedState()
def test_apply_player_action_fail(): """Test that an error is raised if an action in a full or not existent column is applied""" # Test for the insertion in a already full column full_board = initialize_game_state() full_board[:] = PLAYER1 # Fill the board completely with one player n_cols = full_board.shape[1] for i in range( n_cols): # Check that the exception is raised in every column with pytest.raises(Exception) as e: assert apply_player_action(full_board, PlayerAction(i), PLAYER1) assert str( e.value ) == "Tried to apply an action in a non existent or full column" # Test for a non existent column with pytest.raises(Exception) as e: assert apply_player_action(initialize_game_state(), PlayerAction(100), PLAYER1) assert str( e.value ) == "Tried to apply an action in a non existent or full column"
def generate_move_minimax(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState], depth: int = 4) -> Tuple[PlayerAction, SavedState]: """ :param board: State of board, 6 x 7 with either 0 or player ID [1, 2] :param player: Player ID :param saved_state: Not used in this implementation of the minimax move generation :param depth: Depth of the minimax agent / how many steps should be searched ahead :return: Column in which player wants to make his move (chosen using the minimax algorithm) """ # If the minimax agent can make the first move, make sure it is always in the middle (position 3) if not board.any(): return PlayerAction(3), SavedState() # Create a list that holds the player first, and the opponent second players = [PLAYER1, PLAYER2] players.remove(player) ordered_players = [player] + players # Determine the best action using a minimax algorithm with alpha-bet-pruning which looks 4 steps ahead # (two for each player) _, action = minimax(board, -np.inf, np.inf, ordered_players, depth, True) return PlayerAction(action), SavedState()
def generate_move_MCTS(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState], max_time: float = 5) \ -> Tuple[PlayerAction, SavedState]: """ :param board: State of board, 6 x 7 with either 0 or player ID [1, 2] :param player: Player ID :param saved_state: Not used in this implementation of the move generation :param max_time: Time ins sec given to the MCTS agent to find teh next action :return: Column in which player wants to make his move (chosen using MCTS) """ # Give time sec to the agent to find a good action action = MCTS(board, player, max_time) return PlayerAction(action), SavedState()
def generate_move_minimax( board: np.ndarray, _player: BoardPiece, saved_state: Optional[SavedState]) -> Tuple[PlayerAction, SavedState]: # Choose a valid, non-full column randomly and return it as `action` depth = 4 alpha = -math.inf beta = math.inf maximizingPlayer = True action = alpha_beta(board, _player, depth, alpha, beta, maximizingPlayer)[0] return PlayerAction(action), saved_state
def buildGameStateFromID(self): """ plays 4 moves on self.board if they're possible """ # positionID = eg 4256 -> 1. Move: 4th Column, 2. Move: 2nd column player = self.player for col in self.positionID: action = PlayerAction(int(col)) if move_is_possible(self.board, action): apply_player_action(self.board, action, player) player = other_player(player) else: self.status = False break
def test_apply_player_action(): from agents.common import apply_player_action, initialize_game_state board = initialize_game_state() board[5, 0] = PLAYER2 board[5, 1] = PLAYER1 board[5, 2] = PLAYER2 board[5, 3] = PLAYER1 board[5, 4] = PLAYER1 board[5, 5] = PLAYER1 copy_board = board.copy() old_board, position = apply_player_action(board, PlayerAction(3), PLAYER1, True, True) assert old_board.all() == copy_board.all() assert position == (4, 3) assert board[position] == PLAYER1 board[:, 0] = PLAYER1 position2 = apply_player_action(board, PlayerAction(0), PLAYER1, False, True) assert position2 == 0 # Return 0 if full column.
def generate_move_MCTS(board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState]) \ -> object: global PLAYER global OPPONENT PLAYER = player if PLAYER == PLAYER1: OPPONENT = PLAYER2 else: OPPONENT = PLAYER1 action = MCTS(board) return PlayerAction(action), SavedState()
def traverse(self): """ Searches the tree until a node with unexpanded children is found This function is called recursively during the selection phase of MCTS. Recursion ceases once it reaches a node with unexpanded children. At this point, a new child is created from the node's list of actions, and the remainder of the game is simulated. The stats are then updated and propagated up to the root node, which made the original call. Parameters node = node selected by root node or previous select_action call """ # Check whether the current node is a terminal state if self.state == GameState.IS_WIN: if self.max_player: return True else: return False elif self.state == GameState.IS_DRAW: return -1 # If any children are unexpanded, expand them and run a simulation if len(self.children) < len(self.actions): # Select the next randomized action in the list action = PlayerAction(self.actions[len(self.children)]) # Apply the action to the current board child_bd, child_msk = apply_action_cp(self.board, self.mask, action, self.shape) # Add the new child to the node new_child = Connect4Node(child_bd, child_msk, self.shape, action, not self.max_player) # If the game does not end, continue building the tree self.add_child(new_child) # Simulate the game to completion max_win = new_child.sim_game() # Update the child's stats new_child.update_stats(max_win) # Else, continue tree traversal else: next_node_ind = self.ucb1_select() next_child = self.children[next_node_ind] max_win = next_child.traverse() # Update new child's stats based on the result of a simulation self.update_stats(max_win) return max_win
def test_apply_player_action_success(): """Test for successful application of actions""" # Test if application of action (drop of the board piece) is possible for every cell and player for player in players: board = initialize_game_state() n_rows = board.shape[0] for action, column in enumerate(board.T): for i in range(n_rows): board = apply_player_action(board=board, action=PlayerAction(action), player=player) assert isinstance(board, np.ndarray) assert board.dtype == np.int8 assert board.shape == (6, 7) assert not np.all(board == 0) assert board[n_rows - 1 - i, action] == player
def generate_move_minimax( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ :param board: np.ndarray Contains current state of the board an ndarray, shape (ROWS, COLUMNS) and data type (dtype) BoardPiece :param player: BoardPiece Current player playing the game :param saved_state: Saved state of the game :return: action: PlayerAction (np.int8) The column to be played saved_state: The saved state of the game """ col_, val = minimax(4, board, player, math.inf, -math.inf, True) action = PlayerAction(int(col_)) return action, saved_state