示例#1
0
def minimax(board: np.ndarray, depth: int, alpha: int, beta: int,
            player: BoardPiece, maximizing_player: bool) -> Tuple[int, int]:
    # check which player is the agent so that we don't max/min for wrong player
    if player == PLAYER1:
        opponent = PLAYER2
    else:
        opponent = PLAYER1

    # check NO_PLAYER columns
    finding_moves = find_moves(board)

    # check if depth is 0
    if depth == 0:
        score = heuristic(board, player)
        return None, score

    # check if we're at a leaf/terminal node
    if check_end_state(board, player) != GameState.STILL_PLAYING:
        if connected_four(board, player):  # agent won
            return None, 10000000
        if connected_four(board, opponent):  # opponent won
            return None, -10000000
        else:  # must be a draw
            return None, 0

    if maximizing_player:  # get max score for agent
        score = -math.inf
        for column in finding_moves:
            board, board_copy = apply_player_action(board, column, player,
                                                    True)
            next_score = minimax(board_copy, depth - 1, alpha, beta, player,
                                 False)[1]
            if next_score > score:
                score = next_score
                action_column = column
            alpha = max(alpha, score)
            if alpha >= beta:
                break
        return action_column, score

    else:
        score = math.inf
        for column in finding_moves:
            board, action_board = apply_player_action(board, column, opponent,
                                                      True)
            next_score = minimax(action_board, depth - 1, alpha, beta, player,
                                 True)[1]
            if next_score < score:
                score = next_score
                action_column = column
            beta = min(beta, score)  # get min score for opponent
            if alpha >= beta:
                break
        return action_column, score
示例#2
0
def test_connected_four():
    win = connected_four(board=initialize_game_state(), player=np.int8(np.random.choice([1, 2])))

    win_board_1 = np.zeros((6,7))
    win_board_1[0, 0:4] = np.array([1,1,1,1], dtype=np.int8)

    win_player_1 = connected_four(board = win_board_1 , player = np.int8(1))
    win_player_2 = connected_four(board = win_board_1 , player = np.int8(2))

    assert win is False
    assert win_player_1 is True
    assert win_player_2 is False
示例#3
0
def minimax(
    board: np.ndarray,
    player: BoardPiece,
    saved_state: Optional[SavedState] = None
) -> Tuple[int, Optional[SavedState]]:
    """
    This function returns the best position for the agent to play by returning the appropriate column index. The
    agent checks to see if there is a win in any of the available columns, and if so, makes that move. If not, it iterates
    through every column, makes a move there, and checks whether the opposing player can win subsequently. If the opposing
    player can win given the current player's first move, the current player chooses not to make that move in the first
    place.

    Keyword arguments:
        board: the board that the player is playing and trying to win
        player: current player
        saved_state: Optional Saved State

    Returns:
        Tuple: consisting of the location of the column of the best move, and the Optional Saved State
    """

    danger_col = []
    columns = [0, 1, 2, 3, 4, 5, 6]
    score = 0

    other_player = opponent(player)

    for i in available_columns(board):
        board_i = apply_player_action(board, i, player, True)
        if connected_four(board_i, player) == True:
            return i, saved_state
        else:
            danger_col = []
            for j in available_columns(board_i):
                board_i_j = apply_player_action(board_i, j, other_player, True)
                if connected_four(board_i_j, other_player) == True:
                    danger_col.append(
                        j
                    )  # these columns will lead to the opposing player's win

        if len(danger_col) != 0:
            columns.remove(
                i
            )  # don't use columns i in random.choice if they will lead to an other_player win
    cols = np.array(columns)
    action = np.random.choice(
        cols)  # randomly choose a column that will avoid a loss in the
    # opposing player's next move

    return action, saved_state
示例#4
0
def check_winner(board: np.ndarray):
    winner = None
    if connected_four(board, PLAYER1):
        winner = 10

    if connected_four(board, PLAYER2):
        winner = -10

    moves_left = len(get_valid_moves(board))
    if winner == None and moves_left == 0:
        return 0
    elif winner == None and moves_left > 0:
        return None
    else:
        return winner
示例#5
0
def compute_score(board: np.ndarray, player: BoardPiece) -> float:
    """
    This method is a dummy heuristic in minimax.
    The scores returned are 100 (for winning) and -100 (for loosing). ) 0 score for any other case.
    :param board: the board state that needs computing the score
    :param player: the player for whom is the score computed
    :return: the score, an int
    """
    if connected_four(board, player):
        return 100

    opponent = find_opponent(player)
    if connected_four(board, opponent):
        return -100

    return 0
示例#6
0
def recursive(board: np.ndarray, player: BoardPiece, depth: int):
    columns = [0, 1, 2, 3, 4, 5, 6]
    max = 0
    min = 0

    if GameState.IS_DRAW or GameState.IS_WIN:
        return
    else:
        if player == PLAYER1:
            other_player = PLAYER2
        else:
            other_player = PLAYER1

        for i in avail_cols(board):
            danger_col = []

            board_i = apply_player_action(board, i, player, True)
            if connected_four(board_i, player) == True:
                danger_col.append(i)
                return i
            else:
                recursive(board_i, other_player, depth - 1)

        if len(danger_col) != 0:
            columns.remove(
                i
            )  # don't use columns i in random.choice if they will lead to an other_player win
        cols = np.array(columns)
        action = np.random.choice(cols)
示例#7
0
def test_MCTS():
    # Selection
    board = initialize_game_state()
    child_board = initialize_game_state()
    child_board[0, 0] = PLAYER1
    current_node = Node(state=board)
    child_node = Node(state=child_board, parent=current_node)
    current_node.untriedMoves = [0, 3, 4]
    current_node.children = [child_node]
    selected_node = Node.selection(current_node)
    assert selected_node == current_node
    # Expand
    current_node.untriedMoves = [0, 3, 4]
    explored_node = Node.expand(current_node)
    assert len(current_node.untriedMoves) == 2
    assert explored_node != current_node
    # rollout
    current_node = Node(state=board, player=PLAYER1)
    won = connected_four(current_node.state, PLAYER1)
    assert won
    #backpropagate
    Node.update(current_node, result=[-1, 1])
    assert current_node.visits == 1

    selectedColumn = MCTS(board)
    assert selectedColumn
示例#8
0
    def test_connected_four_horizontal(self):
        c4_yes = common.initialize_game_state()
        common.apply_player_action(c4_yes, PlayerAction(0), common.PLAYER1)
        common.apply_player_action(c4_yes, PlayerAction(1), common.PLAYER1)
        common.apply_player_action(c4_yes, PlayerAction(2), common.PLAYER1)
        common.apply_player_action(c4_yes, PlayerAction(3), common.PLAYER1)

        c4_no = common.initialize_game_state()
        common.apply_player_action(c4_no, PlayerAction(0), common.PLAYER1)
        common.apply_player_action(c4_no, PlayerAction(1), common.PLAYER1)
        common.apply_player_action(c4_no, PlayerAction(2), common.PLAYER2)
        common.apply_player_action(c4_no, PlayerAction(3), common.PLAYER1)

        assert common.connected_four(c4_yes, PLAYER1) == True
        assert common.connected_four(c4_yes, PLAYER1, PlayerAction(3)) == True
        assert common.connected_four(c4_no, PLAYER1) == False
        assert common.connected_four(c4_no, PLAYER1, PlayerAction(3)) == False
示例#9
0
    def testConnectedFour(self):

        from agents.common import connected_four

        board = np.zeros((6, 7))
        board[0, 0] = 1 * player
        board2 = board.copy()

        self.assertFalse(connected_four(board, player))
        self.assertFalse(connected_four(board, PLAYER2))

        #Generate new board:
        board[:, 1] = np.ones(6) * player

        self.assertTrue(connected_four(board, player))
        self.assertFalse(connected_four(board2, player))
        self.assertTrue(connected_four(board.T, player))

        #Generate new board:
        board2[2:6, 3:7] = np.eye(4) * PLAYER2
        board2[5, :] = np.array([1, 1, 1, 0, 1, 1, 1]) * player

        self.assertFalse(connected_four(board2, player))
        self.assertFalse(connected_four(
            board2, player))  #Top corner piece is now player
示例#10
0
def test_connect_four():
    from agents.common import connected_four

    assert not connected_four(b1, PLAYER1)
    assert not connected_four(b1, PLAYER2)
    assert not connected_four(b2, PLAYER1)
    assert connected_four(b2, PLAYER2)
    assert connected_four(b3, PLAYER1)
    assert not connected_four(b3, PLAYER2)
    assert connected_four(b4, PLAYER1)
    assert not connected_four(b4, PLAYER2)
示例#11
0
def test_connected_four():
    from agents.common import connected_four

    test_arr = np.zeros((6, 7))
    test_arr[5, 0] = PLAYER1
    test_arr[5, 1] = PLAYER1
    test_arr[5, 2] = PLAYER1
    test_arr[5, 3] = PLAYER1

    test_arr[4, 0] = PLAYER2
    test_arr[4, 1] = PLAYER2
    test_arr[4, 2] = PLAYER2

    assert (connected_four(test_arr, PLAYER1))

    test_arr = np.zeros((6, 7))
    test_arr[2, 4] = PLAYER2
    test_arr[3, 4] = PLAYER2
    test_arr[4, 4] = PLAYER2
    test_arr[5, 4] = PLAYER2

    test_arr[3, 5] = PLAYER1
    test_arr[4, 5] = PLAYER1
    test_arr[5, 5] = PLAYER1

    assert (connected_four(test_arr, PLAYER2))

    test_arr = np.zeros((6, 7))
    test_arr[5, 0] = PLAYER1
    test_arr[4, 1] = PLAYER1
    test_arr[3, 2] = PLAYER1
    test_arr[2, 3] = PLAYER1

    test_arr[5, 6] = PLAYER2
    test_arr[5, 5] = PLAYER2
    test_arr[5, 4] = PLAYER2

    assert (connected_four(test_arr, PLAYER1))

    assert (connected_four(np.flipud(test_arr), PLAYER1))
示例#12
0
def test_connected_four(board=board_to_test,
                        player=play,
                        last_action=play_act):
    """
    Fuction to determine if the last piece placed is connecting 4 of the same
    :param board: Playing board (np.ndarray)
    :param player: The player putting the piece (BoardPiece)
    :param last_action: The column where the piece is placed (PlayerAction)
    """
    from agents.common import connected_four

    ret = connected_four(board, player, last_action)

    assert isinstance(ret, bool)
示例#13
0
文件: test_common.py 项目: tah0/conn4
def test_connected_four():
    from agents.common import connected_four
    from agents.common import initialize_game_state

    dummy_board = initialize_game_state()

    # check empty board
    assert connected_four(dummy_board, PLAYER1) is False

    # check a horizontal win
    horizontal_win_player1 = dummy_board.copy()
    horizontal_win_player1[0, 0:4] = PLAYER1
    assert connected_four(horizontal_win_player1, PLAYER1) is True

    # check a vertical win
    vertical_win_player1 = dummy_board.copy()
    vertical_win_player1[0:4, 0] = PLAYER1
    assert connected_four(vertical_win_player1, PLAYER1) is True

    # check a diagonal win
    diagonal_win_player1 = dummy_board.copy()
    for i in range(4):
        diagonal_win_player1[i, i] = PLAYER1
    assert connected_four(diagonal_win_player1, PLAYER1) is True
示例#14
0
def test_connected_four():
    from agents.common import connected_four

    # 5th column has connected 4 for PLAYER1 - testing vertical win
    test_board = np.array(
        [[BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(1)],
         [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2)],
         [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(1)]])

    assert connected_four(test_board, PLAYER1) == True
    assert connected_four(test_board, PLAYER2) == False

    # row 0, first 4 units are PLAYER2 - testing horizontal win
    test_board_2 = np.array(
        [[BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(1)],
         [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(2)],
         [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(1)]])

    assert connected_four(test_board_2, PLAYER2) == True
    assert connected_four(test_board_2, PLAYER1) == False

    # test \ diagonal - Player 2 wins
    test_board_3 = np.array(
        [[BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(1)],
         [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2)],
         [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(1)]])

    assert connected_four(test_board_3, PLAYER2) == True

    # test / diagonal - Player 2 wins
    test_board_4 = np.array(
        [[BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(2), BoardPiece(2), BoardPiece(2), BoardPiece(1)],
         [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(1), BoardPiece(1), BoardPiece(2), BoardPiece(2)],
         [BoardPiece(2), BoardPiece(0), BoardPiece(2), BoardPiece(0), BoardPiece(1), BoardPiece(0), BoardPiece(1)],
         [BoardPiece(1), BoardPiece(2), BoardPiece(1), BoardPiece(0), BoardPiece(2), BoardPiece(2), BoardPiece(1)]])

    assert connected_four(test_board_4, PLAYER2) == True
示例#15
0
    def simulation(self, node: Node) -> int:
        """
        simulates game until board is full or either player won
        :param node: start node
        :return: result of the game simulation
        """
        simulation_board = deepcopy(node.board)
        player = original_player = node.player

        while not check_board_full(simulation_board) and len(
                check_open_columns(simulation_board)) > 0:
            avail_moves = check_open_columns(simulation_board)
            # switch between players
            player = PLAYER2 if player == PLAYER1 else PLAYER1  # opposite player makes a move first
            # simulate
            simulation_board = apply_player_action(
                simulation_board,
                avail_moves[random.choice(range(len(avail_moves)))],
                player=player)
            # early stopping in case a player won
            if connected_four(simulation_board, player):
                break
        # evaluate end state of the game after simulation for the original player
        return self.result(simulation_board, original_player)
示例#16
0
def get_player_actions(
    board: np.ndarray,
    player: BoardPiece,
    _last_action: Optional[PlayerAction] = None
) -> list:  #could move this to common
    '''
    Returns an array with the possible columns that a player could place a piece in.
    Here also returns an empty list when the game is already won.
    An empty list is therefore returned whenever all actions have been explored or
    a terminal state has been reached.
    '''

    if _last_action != None:
        if connected_four(board, player, _last_action):
            return []  #if game is won

    if np.count_nonzero(board) == board.shape[0] * board.shape[1]:
        return []  #if game is draw

    player_actions = []
    for col in range(board.shape[1]):
        if np.count_nonzero(board[:, col]) < board.shape[0]:
            player_actions.append(col)
    return player_actions  #if still possible actions
示例#17
0
def test_connected_four():
    from agents.common import connected_four

    player_check = 2
    player_wrong = 1
    board1 = np.zeros((6, 7), dtype=np.int8)
    board1[0, :] = np.array([0, 1, 1, 1, 1, 1, 0])
    board1[1, :] = np.array([0, 1, 1, 1, 1, 1, 0])
    board1[2, :] = np.array([0, 1, 1, 1, 1, 1, 0])
    board1[3, :] = np.array([0, 2, 2, 2, 2, 0, 0])
    player_action1 = np.int8(4)

    board2 = np.zeros((6, 7), dtype=np.int8)
    board2[:, 4] = np.array([2, 2, 2, 2, 0, 0])

    board3 = np.array([[0, 0, 2, 1, 1, 1], [0, 0, 0, 2, 2, 1],
                       [0, 0, 0, 0, 2, 1], [0, 0, 0, 0, 0, 2],
                       [0, 0, 0, 0, 0, 0]])
    player_action3 = np.array([5])

    board4 = np.array([[0, 2, 2, 1, 2, 1], [0, 1, 1, 2, 2, 1],
                       [0, 1, 2, 0, 0, 0], [0, 2, 0, 0, 0, 0],
                       [0, 0, 0, 0, 0, 0]])
    player_action4 = np.array([1])

    ret = connected_four(board1, player_check, player_action1)
    ret1 = connected_four(board2, player_check, player_action1)
    ret2 = connected_four(board3, player_check, player_action3)
    ret3 = connected_four(board4, player_check, player_action4)
    ret4 = connected_four(board4, player_wrong, player_action4)
    ret5 = connected_four(board4, player_check, player_action1)

    assert isinstance(ret, bool)
    assert ret == True
    assert ret1 == True
    assert ret2 == True
    assert ret3 == True
    assert ret4 == False
    assert ret5 == False
示例#18
0
def test_connected_four():
    """
    test for connected_four():
        - winning conditions are picked up
            - horizontal
            - vertical
            - diagonal l
            - diagonal r
        - win is possible for both players
        - no win is picked up also for both players

    Implementation:
    > Make a test board with a 4 in a row horizontal pattern and the rest filled with the opponents
      pieces and zeros (noise). Shift the board column to the right and use it to asses for a
      winning condition. Repeat this until the board has been shifted over all positions and also
      perform this over the rows to cover all possible positions where the 4 in row horizontal win
      condition can appear. On the process, the pattern will also be broken when the matrix wraps
      around itself, making 3 in a row and 2 in a row, this can also be checked to see that there
      should not be winning conditions in the board. The previous steps can be done for the vertical,
      diagonal R, and diagonal L, to test all possible winning conditions. This will basically be
      a full permutation test of winning conditions in the board plus added features like checking
      for no winning conditions. We also should repeat this process for winning boards for player 1
      and for player 2.
    """
    # Make transfer variables for ease of use
    n = NO_PLAYER
    o = PLAYER1
    x = PLAYER2

    # Loop between playesrs
    for p, d in zip([PLAYER1, PLAYER2], [PLAYER2, PLAYER1]):
        # Test board for horizontal and vertical + distractions
        board1 = np.array([[p, p, p, p, n, n, n], [d, n, d, n, d, n, d],
                           [n, n, n, n, n, n, n], [n, d, n, d, n, d, n],
                           [n, n, n, n, n, n, n], [d, n, d, n, d, n, d]])

        # Test board for right diagonal and left diagonal + distractions
        board2 = np.array([[p, d, n, d, n, d, n], [n, p, n, n, n, n, n],
                           [d, n, p, n, d, n, d], [n, n, n, p, n, n, n],
                           [n, d, n, d, n, d, n], [n, n, n, n, n, n, n]])

        # This will perform an extensive permutation testing
        for (i, j), _ in np.ndenumerate(board1):
            # Horizontal
            h = cc.connected_four(board=np.roll(np.roll(board1, i, axis=1),
                                                j,
                                                axis=0),
                                  player=p)

            # Vertical
            v = cc.connected_four(board=np.roll(np.roll(board1, i, axis=1),
                                                j,
                                                axis=0).T,
                                  player=p)

            # Diagonal L
            dl = cc.connected_four(board=np.roll(np.roll(board2, i, axis=1),
                                                 j,
                                                 axis=0),
                                   player=p)

            # Diagonal R
            dr = cc.connected_four(board=np.fliplr(
                np.roll(np.roll(board2, i, axis=1), j, axis=0)),
                                   player=p)

        # Winning condition met
        if (i < board1.shape[0] - 4) & (j < board1.shape[1] - 4):
            assert h
            assert v
            assert dl
            assert dr
        else:
            assert ~h
            assert ~v
            assert ~dl
            assert ~dr
示例#19
0
def evaluate_heuristic(board: np.ndarray, action: PlayerAction,
                       player: BoardPiece) -> int:
    """
    Calculates a score for a board

    Parameters
    ----------
    board : np.ndarray
            Board that the move is performed on
    action: PlayerAction
            Column of the move that is performed
    player: BoardPiece
            Player who performs the move

    Return
    ------
    Aggregated Score of all Moves that are possible after the action is performed

    """

    board_copy = board.copy()
    board_copy = apply_player_action(board_copy, action, player, False)

    heuristic = 0
    # check if player can win with this action
    if connected_four(board_copy, player, None):
        heuristic = 99
        return heuristic

    # check if other player can win with this action
    board_copy2 = board.copy()
    apply_player_action(board_copy2, action, other_player(player), False)

    if connected_four(board_copy2, other_player(player), None):
        heuristic = -99
        return heuristic

    # find lowest open row
    for row in range(6):
        if board[row, action] == NO_PLAYER:
            break
        if row == 5:
            raise ValueError("column can't be played")

    # initialize calculation values
    skip_a, skip_b, skip_c, skip_d, skip_e, skip_f, skip_g, skip_h = False, False, False, False, False, False, False, False
    streak_ab, streak_cd, streak_ef, streak_gh = 1, 1, 1, 1
    heuristic_a, heuristic_b, heuristic_c, heuristic_d, heuristic_e, heuristic_f, heuristic_g, heuristic_h = 0, 0, 0, 0, 0, 0, 0, 0

    for i in range(1, 4):
        if (action + i) < 7 and not skip_a:
            if board[row, action + i] == player:
                heuristic_a += 1
                streak_ab += 1
            elif board[row, action + i] == NO_PLAYER:
                streak_ab += 1
            else:
                skip_a = True

        if (action - i) > -1 and not skip_b:
            if board[row, action - i] == player:
                heuristic_b += 1
                streak_ab += 1
            elif board[row, action - i] == NO_PLAYER:
                streak_ab += 1
            else:
                skip_b = True

        if (row + i) < 6 and not skip_c:
            if board[row + i, action] == player:
                heuristic_c += 1
                streak_cd += 1
            elif board[row + i, action] == NO_PLAYER:
                streak_cd += 1
            else:
                skip_c = True

        if (row - i) > -1 and not skip_d:
            if board[row - i, action] == player:
                heuristic_d += 1
                streak_cd += 1
            elif board[row - i, action] == NO_PLAYER:
                streak_cd += 1
            else:
                skip_d = True

        if ((action + i) < 7 and (row + i) < 6) and not skip_e:
            if board[row + i, action + i] == player:
                heuristic_e += 1
                streak_ef += 1
            elif board[row + i, action + i] == NO_PLAYER:
                streak_ef += 1
            else:
                skip_e = True

        if ((action - i) > -1 and (row - i) > -1) and not skip_f:
            if board[row - i, action - i] == player:
                heuristic_f += 1
                streak_ef += 1
            elif board[row - i, action - i] == NO_PLAYER:
                streak_ef += 1
            else:
                skip_f = True

        if ((action + i) < 7 and (row - i) > -1) and not skip_g:
            if board[row - i, action + i] == player:
                heuristic_g += 1
                streak_gh += 1
            elif board[row - i, action + i] == NO_PLAYER:
                streak_gh += 1
            else:
                skip_g = True

        if ((action - i) > -1 and (row + i) < 6) and not skip_h:
            if board[row + i, action - i] == player:
                heuristic_h += 1
                streak_gh += 1
            elif board[row + i, action - i] == NO_PLAYER:
                streak_gh += 1
            else:
                skip_h = True

    if streak_ab < 4:
        # wenn mit dem move in einer Reihe keine 4 erreicht werden können
        heuristic_a = 0
        heuristic_b = 0
    elif streak_ab == 7:
        heuristic += 2
    else:
        # (streak_ab > 3) and (streak_ab < 7):
        heuristic += 1

    if streak_cd < 4:
        # wenn mit dem move in einer Spalte keine 4 erreicht werden können
        heuristic_c = 0
        heuristic_d = 0
    elif streak_cd == 7:
        heuristic += 2
    else:
        # (streak_cd > 3) and (streak_cd < 7):
        heuristic += 1

    if streak_ef < 4:
        # wenn mit dem move in einer rechts-Diagonalen keine 4 erreicht werden können
        heuristic_e = 0
        heuristic_f = 0
    elif streak_ef == 7:
        heuristic += 2
    else:
        # (streak_ef > 3) and (streak_ef < 7):
        heuristic += 1

    if streak_gh < 4:
        # wenn mit dem move in einer links-Diagonalen keine 4 erreicht werden können
        heuristic_g = 0
        heuristic_h = 0
    elif streak_gh == 7:
        heuristic += 2
    else:
        # (streak_gh > 3) and (streak_gh < 7):
        heuristic += 1

    heuristic += heuristic_a + heuristic_b + heuristic_c + heuristic_d + heuristic_e + heuristic_f + heuristic_g + heuristic_h

    return heuristic
示例#20
0
def minimax(board: np.ndarray,
            depth: int,
            maximizingPlayer: bool,
            player: BoardPiece,
            weights: np.ndarray = weights_array):
    """
    Minimax function to obtain the best position for a given player
    :param board: Actual board in the game (np.ndarray)
    :param depth: Depth for simulation
    :param maximizingPlayer: Flag (bool) for maximizing or minimizing
    :param player: Player that is being maximize value or not
    :param weights: Array of weights for the scoring function
    :return: score of the board and the best move to perform
    """

    board_terminal = connected_four(board, player=PLAYER1) or connected_four(
        board, player=PLAYER2) or full_board(board)
    columns = np.argwhere(board[-1, :] == NO_PLAYER)

    if depth == 0 or board_terminal:
        board_score = scoring_function(board=board,
                                       weights=weights,
                                       player=player)
        return int(board_score), None

    elif maximizingPlayer:
        board_score = -10000000
        for c in columns:
            c = int(c)
            im_board, _ = apply_player_action(board=board,
                                              action=c,
                                              player=player,
                                              copy=True)
            if im_board[-1, c] != 0:
                board_terminal = True
            score, _ = minimax(im_board, depth - 1, False, player, weights)
            if score > board_score:
                board_score = score
                best_move = c
        return int(board_score), int(best_move)

    else:
        board_score = 100000000
        if player == PLAYER1:
            opponent = PLAYER2
        else:
            opponent = PLAYER1
        for c in columns:
            c = int(c)
            im_board, _ = apply_player_action(board=board,
                                              action=c,
                                              player=opponent,
                                              copy=True)
            if im_board[-1, c] != 0:
                board_terminal = True
            score, _ = minimax(im_board, depth - 1, True, opponent, weights)
            score = -score
            if score < board_score:
                board_score = score
                best_move = c
        return int(board_score), int(best_move)
示例#21
0
def minimax(board: np.ndarray, depth: int, alpha: int, beta: int,
            player: BoardPiece, maximizing_player: bool) -> Tuple[int, int]:
    '''
	Returns a column where action should be placed and the min and max score for GameState
	:param board: current state of board
	:param depth: depth of search tree
	:param maximizingPlayer: True if we want to max for player
	:return: min or max score for action of player
	'''

    #check which player is the agent so that we don't max/min for wrong player
    if player == PLAYER1:
        opponent_player = PLAYER2
    else:
        opponent_player = PLAYER1

    #check which columns are currently open
    open_cols = np.asarray(check_open_columns(board))

    #check if depth is 0
    if depth == 0:
        score = heuristic(board, player)
        return None, score

    #check if we're at a leaf/terminal node
    if check_end_state(board, player) != GameState.STILL_PLAYING:
        if connected_four(board, player):  #agent won
            return None, 100000
        if connected_four(board, opponent_player):  #opponent won
            return None, -100000
        else:  #must be a draw
            return None, 0

    if maximizing_player:  #get max score for agent
        score = -math.inf
        for column in open_cols:
            #now simulate making a move and check what score it would get, save the original board in board
            board, board_copy = apply_player_action(board, column, player,
                                                    True)
            # recursive call to minimax with depth-1 with board_copy so board isn't modified
            next_score = minimax(board_copy, depth - 1, alpha, beta, player,
                                 False)[1]  #only get the score
            #if the score is better save score and column
            if next_score > score:
                score = next_score
                action_column = column
            #evaluate alpha for early stopping
            alpha = max(alpha, score)
            if alpha >= beta:  #don't evaluate more options down this path of tree
                break
        return action_column, score

    else:
        score = math.inf
        for column in open_cols:
            board, action_board = apply_player_action(board, column,
                                                      opponent_player, True)
            next_score = minimax(action_board, depth - 1, alpha, beta, player,
                                 True)[1]
            if next_score < score:
                score = next_score
                action_column = column
            beta = min(
                beta,
                score)  #here we want to minimize since we're opponent player
            if alpha >= beta:
                break
        return action_column, score
def test_connected_four():
    from agents.common import initialize_game_state
    from agents.common import apply_player_action
    from agents.common import connected_four
    board = initialize_game_state()

    # TRUE TESTS
    # vertical
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    ret = connected_four(board, BoardPiece(1))
    assert isinstance(ret, bool)
    assert ret == True

    # horizontal
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 3, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    ret = connected_four(board, 1, 5)
    assert isinstance(ret, bool)
    assert ret == True

    # left right diagonal
    board = initialize_game_state()
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 1, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 3, BoardPiece(1), False)
    ret = connected_four(board, 1, 3)
    assert isinstance(ret, bool)
    assert ret == True

    # right left diagonal
    board = initialize_game_state()
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 1, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 3, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)

    ret = connected_four(board, 2, 0)
    assert isinstance(ret, bool)
    assert ret == True

    # FALSE TESTS
    # vertical
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    ret = connected_four(board, BoardPiece(2), 3)
    assert ret == False

    # horizontal
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 3, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    ret = connected_four(board, 2, 2)
    assert isinstance(ret, bool)
    assert ret == False

    # left right diagonal
    board = initialize_game_state()
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 1, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    ret = connected_four(board, BoardPiece(1), 4)
    assert ret == False

    # right left diagonal
    board = initialize_game_state()
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 0, BoardPiece(2), False)
    apply_player_action(board, 0, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    apply_player_action(board, 1, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 5, BoardPiece(1), False)
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 4, BoardPiece(1), False)
    apply_player_action(board, 1, BoardPiece(2), False)
    ret = connected_four(board, 2, 1)
    assert isinstance(ret, bool)
    assert ret == False

    # NO WIN TEST
    board = initialize_game_state()
    apply_player_action(board, 2, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    apply_player_action(board, 2, BoardPiece(1), False)
    apply_player_action(board, 3, BoardPiece(2), False)
    ret = connected_four(board, BoardPiece(1))
    assert isinstance(ret, bool)
    assert ret == False
示例#23
0
def test_connected_four():
    from agents.common import connected_four
    board = np.zeros((6, 7), dtype=BoardPiece)
    player = BoardPiece(2)
    ret = connected_four(board, player)
    assert isinstance(ret, bool)
示例#24
0
def monte_carlo_tree_search(
        board: np.ndarray,
        player: BoardPiece,
        saved_state: Optional[SavedState],
        timeout: np.int8 = 10) -> Tuple[PlayerAction, Optional[SavedState]]:
    '''
    4 step tree search algorithm:
     1. Selection
     2. Expansion
     3. Simulation
     4. Bakpropagation

    :param board:
    :param player:
    :param saved_state:
    :param timeout:
    :return:
    '''

    MinPiece = 3 - player
    MaxPiece = player

    root = Node(board=board, player=MinPiece)

    #check immediate win

    for action in root.action_notExp:
        state = board.copy()
        apply_player_action(state, action, MaxPiece)
        if connected_four(state, MaxPiece, action) == True:
            return action, saved_state

    start = time.clock()
    while True:

        node = root
        state = board.copy()

        # selection
        # keep going down the tree based on best UCT values until terminal (no more children) or unexpanded node (no more moves to expand)
        while node.action_notExp == [] and node.childNodes != []:
            node = node.selection()
            apply_player_action(state, node.action, MaxPiece)

        # expansion
        if node.action_notExp != []:
            action = random.choice(node.action_notExp)
            node = node.expansion(action)

        # simulation
        state = node.board.copy()

        player_roll = node.player

        result = 0

        while get_player_actions(
                state, 3 - player_roll, action
        ) and result != 1 and result != -0.1:  #check here if win or loss already occured

            player_roll = 3 - player_roll
            action = random.choice(
                get_player_actions(state, player_roll, action))

            apply_player_action(state, action, player_roll)

            result = check_result(
                state, MaxPiece, action
            )  #check if the agent won or lost i.e. the player looking for max wins

        # backpropagation
        while node is not None:
            node.update(result)
            node = node.parent

        duration = time.clock() - start
        if duration > timeout: break

    choose_fnct = lambda child: child.wins / child.visits
    chosen_child = sorted(
        root.childNodes,
        key=choose_fnct)[::-1]  #change order from highest to largest

    return chosen_child[0].action, saved_state  #choose largest element
示例#25
0
def minimax(depth: int,
            board: np.ndarray,
            player: BoardPiece,
            alpha,
            beta,
            maximizing=True):
    """

    :param depth: depth of the tree search of type int
    :param board: Contains current state of the board an ndarray, shape (ROWS, COLUMNS) and data type (dtype) BoardPiece
    :param player: Current player playing the game of type BoardPiece
    :param alpha: Alpha value for alpha-beta pruning of type float
    :param beta: Beta value for alpha-beta pruning of type float
    :param maximizing: A boolean value to switch between maximising and minimising heuristic_value
    :return: column : the column to be played by the agent of type int
            value : the heuristic value of the board

    """
    board_copy = np.copy(board)
    valid_columns = []
    for col in range(COLUMNS):
        if board[ROWS - 1][col] == 0:
            valid_columns.append(col)

    if depth == 0 or check_end_state(
            board, player).name == GameState.IS_WIN or len(valid_columns) == 0:
        if check_end_state(
                board,
                player).name == GameState.IS_WIN or len(valid_columns) == 0:
            if connected_four(board_copy, BoardPiece(2)):
                return None, math.inf
            elif connected_four(board_copy, BoardPiece(1)):
                return None, -math.inf
            else:
                return None, 0
        else:
            return None, board_heuristic(board_copy, BoardPiece(2))

    if maximizing:
        value = -math.inf
        column = random.choice(valid_columns)
        for col in valid_columns:
            board_copy = np.copy(board)
            value_temp = minimax(depth - 1, board_copy, player, alpha, beta,
                                 False)[1]
            if value_temp > value:
                value = value_temp
                column = col
            alpha = max(alpha, value)
            if alpha >= beta:
                break
        return column, value
    else:
        value = math.inf
        column = random.choice(valid_columns)
        for col in valid_columns:
            board_copy = np.copy(board)
            value_temp = minimax(depth - 1, board_copy, player, alpha, beta,
                                 True)[1]
            if value_temp < value:
                value = value_temp
                column = col
            beta = min(beta, value)
            if beta <= alpha:
                break
        return column, value
示例#26
0
def MCTS(board: np.ndarray) -> PlayerAction:

    rootNode = Node(state=board, player=PLAYER)
    itermax = 100000
    start = time.time()
    global Timeout
    for i in range(itermax):

        node = rootNode

        #############
        # selection #
        #############
        # keep going down the tree based on best UCT values until terminal or unexpanded node
        while not np.any(node.untriedMoves) and node.childNodes != []:
            node = node.selection()

        #############
        #  Expand   #
        #############
        if np.any(node.untriedMoves):
            # Choose a random action from available moves
            action = np.random.choice(node.untriedMoves)
            node = node.expand(action)

        #############
        #  rollout  #
        #############
        board = node.state.copy()
        win_game_flag = False
        currentPlayer = node.player
        while np.any(find_columns(board)) and not win_game_flag:
            if currentPlayer == PLAYER2:
                currentPlayer = PLAYER1
            else:
                currentPlayer = PLAYER2
            action = np.random.choice(find_columns(board))
            board, _ = apply_player_action(board, action, currentPlayer)
            win_game_flag = connected_four(board, currentPlayer)

        #################
        # backpropagate #
        #################
        if win_game_flag:
            if currentPlayer == PLAYER:
                result = 1  # The player won
            else:
                result = -1  # The player lost against the opponent
        else:
            result = 0
        while node is not None:
            node.update(result)
            node = node.parent

        duration = time.time() - start
        if duration > Timeout:
            break

    bestScore = -10000000.0
    selectedColumn = -1
    for child in rootNode.childNodes:
        if connected_four(child.state, child.player):
            return child.move
        else:
            score = child.wins / child.visits
            if score > bestScore:
                selectedColumn = child.move
                bestScore = score
    return selectedColumn