示例#1
0
def run_simulation(start_node: MCTSNode,
                   root_player: BoardPiece,
                   print_final=False) -> (np.ndarray, GameState):
    """
    The 3rd part of the algorithm.
    This function runs a complete game with random moves from the start node board until one player wins.
    This is one simulation in the MCTS algorithm.
    :param start_node: the expended node from which we start the simulation
    :param root_player:
    :param print_final: flag variable for printing the final board of the game
    :return: the final board state (np.narray), the game end state (GameState)
    """
    current_board = start_node.board.copy()
    current_player = start_node.player
    while check_end_state(current_board,
                          current_player) == GameState.STILL_PLAYING:
        # action = np.random.randint(0, 7)
        possible_actions = possible_moves(current_board)
        action = possible_actions[np.random.randint(len(possible_actions))]
        current_board = apply_player_action(current_board, np.int8(action),
                                            current_player)
        current_player = find_opponent(current_player)

    game_result = check_end_state(current_board, root_player)
    if print_final:
        print(pretty_print_board(current_board))
    return current_board, game_result
示例#2
0
def minimax_algorithm(board: np.ndarray,
                      root_player: BoardPiece,
                      current_player: BoardPiece,
                      depth: int = 4,
                      alpha=NEGATIVE_INF,
                      beta=POSITIVE_INF) -> float:
    """
    The recursive minimax algorithm with alpha-beta pruning and dynamic depth.
    :param board: the current board
    :param root_player: the player who makes the move on the root board
    :param current_player: the player making the move on the current board
    :param depth: the current depth
    :param alpha: alpha factor in alpha-beta pruning
    :param beta: beta factor in alpha-beta pruning
    :return:
    """
    if depth == 0 or check_end_state(
            board, current_player) != GameState.STILL_PLAYING:
        # score = compute_score(board, root_player)
        score = compute_score_2(board, root_player)
        return score

    children = generate_child_boards(board, current_player)

    if current_player == root_player:
        max_score = NEGATIVE_INF
        for i in range(len(children)):
            score = minimax_algorithm(children[i], root_player,
                                      find_opponent(current_player), depth - 1,
                                      alpha, beta)
            max_score = np.maximum(max_score, score)
            alpha = np.maximum(alpha, score)
            if beta <= alpha:
                break
        return max_score
    else:
        min_score = POSITIVE_INF
        for i in range(len(children)):
            score = minimax_algorithm(children[i], root_player,
                                      find_opponent(current_player), depth - 1,
                                      alpha, beta)
            min_score = np.minimum(min_score, score)
            beta = np.minimum(beta, score)
            if beta <= alpha:
                break
        return min_score
示例#3
0
def mcts_algorithm(board: np.ndarray,
                   root_player: BoardPiece,
                   trials=100,
                   profiling=False) -> list:
    """
        The Monte Carlo Tree Search algorithm.
        Starting from a given board, when the root_player has to do a move, it runs "trials" simulations in order to find
        which next move is the best. While doing so, it constructs a tree (data structure composed by MCTSNode objects,
        connected by .parent and .children references).
        MCTS has 4 phases: selection, expansion, simulation and back propagation.

        :param board: the game state for which the next action has to be decided
        :param root_player: the player that should do the next action
        :param trials: number of simulations the algorithm performs for constructing the MC tree before selecting a move
        :return: the MC tree as a list
    """
    root_node = MCTSNode(board, root_player)
    mcts_tree = [root_node]

    t = np.zeros((5, trials))
    for i in range(trials):
        t[0, i] = time.time()
        selected_node = do_selection(root_node)
        t[1, i] = time.time()

        expanded_node = do_expansion(selected_node)
        mcts_tree.append(expanded_node)
        t[2, i] = time.time()

        final_board, simulation_result = run_simulation(expanded_node,
                                                        root_player,
                                                        print_final=False)
        t[3, i] = time.time()

        if simulation_result == GameState.IS_LOST:
            gain_wins_player = root_player
        else:
            gain_wins_player = find_opponent(root_player)
        back_propagate_statistics(expanded_node, gain_wins_player)
        t[4, i] = time.time()

    if profiling:
        print("Selection: %.3f" % (t[1, :] - t[0, :]).sum())
        print("Expansion: %.3f" % (t[2, :] - t[1, :]).sum())
        print("Simulation: %.3f" % (t[3, :] - t[2, :]).sum())
        print("Back propagation: %.3f" % (t[4, :] - t[3, :]).sum())

    return mcts_tree
示例#4
0
def compute_score(board: np.ndarray, player: BoardPiece) -> float:
    """
    This method is a dummy heuristic in minimax.
    The scores returned are 100 (for winning) and -100 (for loosing). ) 0 score for any other case.
    :param board: the board state that needs computing the score
    :param player: the player for whom is the score computed
    :return: the score, an int
    """
    if connected_four(board, player):
        return 100

    opponent = find_opponent(player)
    if connected_four(board, opponent):
        return -100

    return 0
示例#5
0
def do_expansion(current_node: MCTSNode):
    """
    The 2nd part of the algorithm: once a leaf was found, a new node is created (expanded).
    :param current_node: the found leaf node.
    :return: a newly created node, added to the MCTS tree. From this node the simulation will start.
    """
    next_child_index = current_node.children_index[len(current_node.children)]
    expanded_board = apply_player_action(current_node.board,
                                         next_child_index,
                                         current_node.player,
                                         copy=True)

    expanded_node = MCTSNode(expanded_board,
                             find_opponent(current_node.player), current_node)
    current_node.children.append(expanded_node)
    return expanded_node
示例#6
0
def not_used_mcts_algorithm(board: np.ndarray,
                            root_player: BoardPiece,
                            trials=20) -> list:
    root_node = MCTSNode(board, root_player)
    mcts_tree = [root_node]

    for i in range(trials):
        current_node = root_node
        current_player = root_player

        # selection and expansion
        extended = False
        while not extended:
            if len(current_node.children
                   ) != 7:  # this corresponds to expansion
                new_child_found = False
                child_board = None
                while not new_child_found:
                    action_made = False
                    while not action_made:
                        action = np.random.randint(0, 7)
                        child_board = apply_player_action(current_node.board,
                                                          np.int8(action),
                                                          current_player,
                                                          copy=True)
                        if not np.all(child_board == current_node):
                            action_made = True

                    if not current_node.children:  # we cannot iterate over an empty list; this is the true leaf case
                        new_child_found = True
                    else:
                        repeated_child = False
                        for c in current_node.children:
                            if np.all(c.board == child_board):
                                repeated_child = True
                        if not repeated_child:
                            new_child_found = True

                child_node = MCTSNode(child_board,
                                      find_opponent(current_player))
                child_node.parent = current_node
                current_node.children.append(child_node)
                mcts_tree.append(child_node)
                extended = True

            else:  # UCB1 # this corresponds to selection
                ucb_scores = np.array([
                    upper_confidence_bound_1(c.wins, c.plays, c.parent.plays)
                    for c in current_node.children
                ])
                selected_node_index = np.argmax(ucb_scores)
                # mcts_tree.append(selected_node)
                current_node = current_node.children[selected_node_index]
                current_player = find_opponent(current_player)

        # simulation
        # print(i)
        # print(pretty_print_board(child_node.board))
        final_board, simulation_result = run_simulation(child_node,
                                                        print_final=False)
        # print(simulation_result)

        # back propagation
        # go upwards from the child node to the root via parent
        bp_node = child_node
        while bp_node.parent is not None:
            # print(pretty_print_board(bp_node.board))
            bp_node.plays += 1
            # update the wins for the losing nodes
            # because they are actually useful for their children - that have the opponent player of the loser
            if check_end_state(final_board,
                               bp_node.player) == GameState.IS_LOST:
                bp_node.wins += 1
            bp_node = bp_node.parent

    return mcts_tree
示例#7
0
def test_find_opponent():
    from agents.common import find_opponent
    assert PLAYER2 == find_opponent(PLAYER1)
    assert PLAYER1 == find_opponent(PLAYER2)
    assert PLAYER2 == find_opponent(
        NO_PLAYER)  # PLAYER2 is the default opponent