def test_apply_player_action(): c_board = cm.initialize_game_state() copied_board, board = cm.apply_player_action(c_board, 3, cm.PLAYER1) test_board = np.zeros((6, 7), dtype=BoardPiece) test_board[0, 3] = cm.PLAYER1 assert np.all(test_board.__eq__(board))
def human_vs_agent( generate_move_1: GenMove, generate_move_2: GenMove = user_move, player_1: str = "Player 1", player_2: str = "Player 2", args_1: tuple = (), args_2: tuple = (), init_1: Callable = lambda board, player: None, init_2: Callable = lambda board, player: None, ): players = (PLAYER1, PLAYER2) for play_first in (1, -1): for init, player in zip((init_1, init_2)[::play_first], players): init(initialize_game_state(), player) saved_state = {PLAYER1: None, PLAYER2: None} board = initialize_game_state() gen_moves = (generate_move_1, generate_move_2)[::play_first] player_names = (player_1, player_2)[::play_first] gen_args = (args_1, args_2)[::play_first] playing = True end_state = GameState.STILL_PLAYING while playing: for player, player_name, gen_move, args in zip( players, player_names, gen_moves, gen_args, ): t0 = time.time() print(pretty_print_board(board)) action, saved_state[player] = gen_move(board.copy(), player, saved_state[player], *args) print('{} \'s action is {}'.format(player_name, action)) print(f"Move time: {time.time() - t0:.3f}s") board, r_board = apply_player_action(board, action, player, True) end_state = check_end_state(board, player) if end_state != GameState.STILL_PLAYING: print(pretty_print_board(board)) if end_state == GameState.IS_DRAW: print("Game ended in draw") else: print(f'{player_name} won playing \ {"X" if player == PLAYER1 else "O"}') playing = False break
def simulate_game(node: mcts_node.mcts_node) -> tuple: """ Simulate the game of the current node :param node: The MCTS node :return: tuple containing details if a player has won the game during simulation """ board = node.state.copy() win = False current_player = node.player while np.any(cn.get_free_columns(board)) and not win: if current_player == cn.PLAYER2: current_player = cn.PLAYER1 else: current_player = cn.PLAYER2 # Choose a random action action = np.random.choice(cn.get_free_columns(board)) # Apply the action board,_ = cn.apply_player_action(board, action, current_player) # Check if the game is won win = cn.connected_four(board, current_player) return win, current_player
def expand_node(self, action: common.PlayerAction): """ :param action: Action to apply in order to expand a node :return: Child after node expand_node (The board of the child node corresponds to the board of the parent after the action was applied) """ # Apply the action to the board of the parent node opponent = common.PLAYER1 if self.player == common.PLAYER1: opponent = common.PLAYER2 new_board, original_board = common.apply_player_action( self.state.copy(), action, opponent) # Create a new child node with that action and board child = mcts_node(move=action, parent=self, state=new_board, player=opponent) # Append the created child to the children of the parent self.children.append(child) # Remove the action from the untried actions from the parent self.open_moves = np.setdiff1d(self.open_moves, action) return child
def minimax(board: np.ndarray, depth: int, alpha: int, beta: int, maximizing_player: bool) -> tuple: """ Applies the minimax algorithm on the board to give a suggested column number and maximising/minimising score :parameter board: the playing board of type np.ndarray :parameter depth: the depth till which to evaluate the board with the algorithm :parameter alpha: the minimum score the maximising player is assured of :parameter beta: the maximum score the minimising player is assured of :parameter maximizing_player: boolean flag suggesting if the algorithm needs to maximise or minimise :return: The column and computed maximum/minimum score """ # if depth is 0 or node is terminal, return heuristic value of the node # if terminal check if it is win , loss or draw if cn.connected_four(board, AGENT): return None, 10000 if cn.connected_four(board, HUMAN): return None, -10000 # Check if there is a draw if cn.check_end_state(board, AGENT) == cn.GameState.IS_DRAW: return None, 0 # if depth is 0, calculate heuristic scoring if depth == 0: return None, heuristic_scoring(board, AGENT) # get a list of columns open for placement col_list = cn.get_free_columns(board) if maximizing_player: # value = -infinity value = -math.inf column = np.random.choice(col_list) # for child node (interpreted it as available columns) for col in col_list: # apply a random column to check the minimax b_copy, ori_board = cn.apply_player_action(board, col, AGENT, True) # get the score only new_score = minimax(b_copy, depth - 1, alpha, beta, False)[1] # personal debugging if depth == GLOBAL_DEPTH: print('For col {}, the score is {}'.format(col, new_score)) # use the new score if higher if new_score > value: value = new_score column = col # prune in the maximising node alpha = max(alpha, value) if alpha >= beta: break return column, value # minimising branch else: # value = +infinity value = math.inf column = np.random.choice(col_list) for col in col_list: b_copy, ori_board = cn.apply_player_action(board, col, HUMAN, True) new_score = minimax(b_copy, depth - 1, alpha, beta, True)[1] # get the minimizing score if new_score < value: value = new_score column = col # do beta pruning check beta = min(beta, value) if alpha >= beta: break return column, value
def test_apply_player_action_copy(): board = cm.initialize_game_state() cp, board = cm.apply_player_action(board, 6, cm.PLAYER1, True) assert ~np.all(cp == board) assert cp[0, 6] == 1
def test_apply_player_action_exception(): board = cm.initialize_game_state() board[5, 6] = cm.PLAYER1 with pytest.raises(Exception, match='Cannot place player in that particular position'): cm.apply_player_action(board, 6, cm.PLAYER2)