def run_simulation(start_node: MCTSNode, root_player: BoardPiece, print_final=False) -> (np.ndarray, GameState): """ The 3rd part of the algorithm. This function runs a complete game with random moves from the start node board until one player wins. This is one simulation in the MCTS algorithm. :param start_node: the expended node from which we start the simulation :param root_player: :param print_final: flag variable for printing the final board of the game :return: the final board state (np.narray), the game end state (GameState) """ current_board = start_node.board.copy() current_player = start_node.player while check_end_state(current_board, current_player) == GameState.STILL_PLAYING: # action = np.random.randint(0, 7) possible_actions = possible_moves(current_board) action = possible_actions[np.random.randint(len(possible_actions))] current_board = apply_player_action(current_board, np.int8(action), current_player) current_player = find_opponent(current_player) game_result = check_end_state(current_board, root_player) if print_final: print(pretty_print_board(current_board)) return current_board, game_result
def minimax_algorithm(board: np.ndarray, root_player: BoardPiece, current_player: BoardPiece, depth: int = 4, alpha=NEGATIVE_INF, beta=POSITIVE_INF) -> float: """ The recursive minimax algorithm with alpha-beta pruning and dynamic depth. :param board: the current board :param root_player: the player who makes the move on the root board :param current_player: the player making the move on the current board :param depth: the current depth :param alpha: alpha factor in alpha-beta pruning :param beta: beta factor in alpha-beta pruning :return: """ if depth == 0 or check_end_state( board, current_player) != GameState.STILL_PLAYING: # score = compute_score(board, root_player) score = compute_score_2(board, root_player) return score children = generate_child_boards(board, current_player) if current_player == root_player: max_score = NEGATIVE_INF for i in range(len(children)): score = minimax_algorithm(children[i], root_player, find_opponent(current_player), depth - 1, alpha, beta) max_score = np.maximum(max_score, score) alpha = np.maximum(alpha, score) if beta <= alpha: break return max_score else: min_score = POSITIVE_INF for i in range(len(children)): score = minimax_algorithm(children[i], root_player, find_opponent(current_player), depth - 1, alpha, beta) min_score = np.minimum(min_score, score) beta = np.minimum(beta, score) if beta <= alpha: break return min_score
def mcts_algorithm(board: np.ndarray, root_player: BoardPiece, trials=100, profiling=False) -> list: """ The Monte Carlo Tree Search algorithm. Starting from a given board, when the root_player has to do a move, it runs "trials" simulations in order to find which next move is the best. While doing so, it constructs a tree (data structure composed by MCTSNode objects, connected by .parent and .children references). MCTS has 4 phases: selection, expansion, simulation and back propagation. :param board: the game state for which the next action has to be decided :param root_player: the player that should do the next action :param trials: number of simulations the algorithm performs for constructing the MC tree before selecting a move :return: the MC tree as a list """ root_node = MCTSNode(board, root_player) mcts_tree = [root_node] t = np.zeros((5, trials)) for i in range(trials): t[0, i] = time.time() selected_node = do_selection(root_node) t[1, i] = time.time() expanded_node = do_expansion(selected_node) mcts_tree.append(expanded_node) t[2, i] = time.time() final_board, simulation_result = run_simulation(expanded_node, root_player, print_final=False) t[3, i] = time.time() if simulation_result == GameState.IS_LOST: gain_wins_player = root_player else: gain_wins_player = find_opponent(root_player) back_propagate_statistics(expanded_node, gain_wins_player) t[4, i] = time.time() if profiling: print("Selection: %.3f" % (t[1, :] - t[0, :]).sum()) print("Expansion: %.3f" % (t[2, :] - t[1, :]).sum()) print("Simulation: %.3f" % (t[3, :] - t[2, :]).sum()) print("Back propagation: %.3f" % (t[4, :] - t[3, :]).sum()) return mcts_tree
def compute_score(board: np.ndarray, player: BoardPiece) -> float: """ This method is a dummy heuristic in minimax. The scores returned are 100 (for winning) and -100 (for loosing). ) 0 score for any other case. :param board: the board state that needs computing the score :param player: the player for whom is the score computed :return: the score, an int """ if connected_four(board, player): return 100 opponent = find_opponent(player) if connected_four(board, opponent): return -100 return 0
def do_expansion(current_node: MCTSNode): """ The 2nd part of the algorithm: once a leaf was found, a new node is created (expanded). :param current_node: the found leaf node. :return: a newly created node, added to the MCTS tree. From this node the simulation will start. """ next_child_index = current_node.children_index[len(current_node.children)] expanded_board = apply_player_action(current_node.board, next_child_index, current_node.player, copy=True) expanded_node = MCTSNode(expanded_board, find_opponent(current_node.player), current_node) current_node.children.append(expanded_node) return expanded_node
def not_used_mcts_algorithm(board: np.ndarray, root_player: BoardPiece, trials=20) -> list: root_node = MCTSNode(board, root_player) mcts_tree = [root_node] for i in range(trials): current_node = root_node current_player = root_player # selection and expansion extended = False while not extended: if len(current_node.children ) != 7: # this corresponds to expansion new_child_found = False child_board = None while not new_child_found: action_made = False while not action_made: action = np.random.randint(0, 7) child_board = apply_player_action(current_node.board, np.int8(action), current_player, copy=True) if not np.all(child_board == current_node): action_made = True if not current_node.children: # we cannot iterate over an empty list; this is the true leaf case new_child_found = True else: repeated_child = False for c in current_node.children: if np.all(c.board == child_board): repeated_child = True if not repeated_child: new_child_found = True child_node = MCTSNode(child_board, find_opponent(current_player)) child_node.parent = current_node current_node.children.append(child_node) mcts_tree.append(child_node) extended = True else: # UCB1 # this corresponds to selection ucb_scores = np.array([ upper_confidence_bound_1(c.wins, c.plays, c.parent.plays) for c in current_node.children ]) selected_node_index = np.argmax(ucb_scores) # mcts_tree.append(selected_node) current_node = current_node.children[selected_node_index] current_player = find_opponent(current_player) # simulation # print(i) # print(pretty_print_board(child_node.board)) final_board, simulation_result = run_simulation(child_node, print_final=False) # print(simulation_result) # back propagation # go upwards from the child node to the root via parent bp_node = child_node while bp_node.parent is not None: # print(pretty_print_board(bp_node.board)) bp_node.plays += 1 # update the wins for the losing nodes # because they are actually useful for their children - that have the opponent player of the loser if check_end_state(final_board, bp_node.player) == GameState.IS_LOST: bp_node.wins += 1 bp_node = bp_node.parent return mcts_tree
def test_find_opponent(): from agents.common import find_opponent assert PLAYER2 == find_opponent(PLAYER1) assert PLAYER1 == find_opponent(PLAYER2) assert PLAYER2 == find_opponent( NO_PLAYER) # PLAYER2 is the default opponent