示例#1
0
def monte_carlo_sample(board_state, side):
    """Sample a single rollout from the current board_state and
     side. Moves are made to the current board_state until we reach a
     terminal state then the result and the first move made to get
     there is returned.

    Args:

    board_state (3x3 tuple of int): state of the board

    side (int): side currently to play. +1 for the plus player, -1 for
        the minus player

    Returns: (result(int), move(int,int)): The result from this
        rollout, +1 for a win for the plus player -1 for a win for the
        minus player, 0 for a draw

    """
    result = has_winner(board_state)
    if result != 0:
        return result, None
    moves = list(available_moves(board_state))
    if not moves:
        return 0, None

    # select a random move
    move = random.choice(moves)
    result, next_move = monte_carlo_sample(apply_move(board_state, move, side),
                                           -side)
    return result, move
示例#2
0
def min_max_alpha_beta(board_state,
                       side,
                       max_depth,
                       evaluation_func=evaluate,
                       alpha=-sys.float_info.max,
                       beta=sys.float_info.max):
    """Runs the min_max_algorithm on a given board_sate for a given side, to a given depth in order to find the best
    move

    Args:
        board_state (3x3 tuple of int): The board state we are evaluating
        side (int): either +1 or -1
        max_depth (int): how deep we want our tree to go before we use the evaluate method to determine how good the
        position is.
        evaluation_func (board_state -> int): Function used to evaluate the position for the plus player
        alpha (float): Used when this is called recursively, normally ignore
        beta (float): Used when this is called recursively, normally ignore

    Returns:
        (best_score(int), best_score_move((int, int)): the move found to be best and what it's min-max score was
    """
    best_score_move = None
    moves = list(available_moves(board_state))
    if not moves:
        return 0, None

    for move in moves:
        new_board_state = apply_move(board_state, move, side)
        winner = has_winner(new_board_state)
        if winner != 0:
            return winner * 10000, move
        else:
            if max_depth <= 1:
                score = evaluation_func(new_board_state)
            else:
                score, _ = min_max_alpha_beta(new_board_state, -side,
                                              max_depth - 1, alpha, beta)

        if side > 0:
            if score > alpha:
                alpha = score
                best_score_move = move
        else:
            if score < beta:
                beta = score
                best_score_move = move
        if alpha >= beta:
            break

    return alpha if side > 0 else beta, best_score_move
示例#3
0
def min_max(board_state, side, max_depth, evaluation_func=evaluate):
    """Runs the min_max_algorithm on a given board_sate for a given side, to a given depth in order to find the best
    move

    Args:
        board_state (3x3 tuple of int): The board state we are evaluating
        side (int): either +1 or -1
        max_depth (int): how deep we want our tree to go before we use the evaluate method to determine how good the
        position is.
        evaluation_func (board_state -> int): Function used to evaluate the position for the plus player

    Returns:
        (best_score(int), best_score_move((int, int)): the move found to be best and what it's min-max score was
    """
    best_score = None
    best_score_move = None

    moves = list(available_moves(board_state))
    if not moves:
        # this is a draw
        return 0, None

    for move in moves:
        new_board_state = apply_move(board_state, move, side)
        winner = has_winner(new_board_state)
        if winner != 0:
            return winner * 10000, move
        else:
            if max_depth <= 1:
                score = evaluation_func(new_board_state)
            else:
                score, _ = min_max(new_board_state, -side, max_depth - 1)
            if side > 0:
                if best_score is None or score > best_score:
                    best_score = score
                    best_score_move = move
            else:
                if best_score is None or score < best_score:
                    best_score = score
                    best_score_move = move
    return best_score, best_score_move
def monte_carlo_tree_search_uct(board_state, side, number_of_samples):
    """Evaluate the best from the current board_state for the given side using monte carlo sampling with upper
    confidence bounds for trees.

    Args:
        board_state (3x3 tuple of int): state of the board
        side (int): side currently to play. +1 for the plus player, -1 for the minus player
        number_of_samples (int): number of samples rollouts to run from the current position, the higher the number the
            better the estimation of the position

    Returns:
        (result(int), move(int,int)): The average result for the best move from this position and what that move was.
    """
    state_results = collections.defaultdict(float)
    state_samples = collections.defaultdict(float)

    for _ in range(number_of_samples):
        current_side = side
        current_board_state = board_state
        first_unvisited_node = True
        rollout_path = []
        result = 0

        while result == 0:
            move_states = {
                move: apply_move(current_board_state, move, current_side)
                for move in available_moves(current_board_state)
            }

            if not move_states:
                result = 0
                break

            if all((state in state_samples) for _, state in move_states):
                log_total_samples = math.log(
                    sum(state_samples[s] for s in move_states.values()))
                move, state = max(
                    move_states,
                    key=lambda _, s: _upper_confidence_bounds(
                        state_results[s], state_samples[s], log_total_samples))
            else:
                move = random.choice(list(move_states.keys()))

            current_board_state = move_states[move]

            if first_unvisited_node:
                rollout_path.append((current_board_state, current_side))
                if current_board_state not in state_samples:
                    first_unvisited_node = False

            current_side = -current_side

            result = has_winner(current_board_state)

        for path_board_state, path_side in rollout_path:
            state_samples[path_board_state] += 1.
            result *= path_side
            # normalize results to be between 0 and 1 before this it between -1 and 1
            result /= 2.
            result += .5
            state_results[path_board_state] += result

    move_states = {
        move: apply_move(board_state, move, side)
        for move in available_moves(board_state)
    }

    move = max(move_states,
               key=lambda x: state_results[move_states[x]] / state_samples[
                   move_states[x]])

    return state_results[move_states[move]] / state_samples[
        move_states[move]], move