    def test_board_full(self):
        board = np.asarray([[0, 0, 0], [0, 0, 0], [0, 0, 0]])

        board = np.asarray([[-1, 1, 1], [0, 1, -2], [1, 1, -1]])

        board = np.asarray([[1, 1, 1], [1, 1, 1], [1, 1, 1]])

        board = np.asarray([[1, -1, 1], [1, 1, -1], [-1, 1, 1]])
    def mcts(self, board):
        max_time = time.time() + self.time_budget
        root_node = TreeNode(board)
        playout_count = 0

        while time.time() < max_time and playout_count < self.max_playouts:
            # Start at tree root (current actual state)
            current_node = root_node
            current_player = self.side

            while True:
                # Check for terminal state
                winner = rules.winner(current_node.state)
                if winner or rules.board_full(current_node.state):

                # Pick a random move
                empty_cells = rules.empty_cells(current_node.state)
                move = tuple(random.choice(empty_cells))

                # Add to tree if not present
                if move not in current_node.child_nodes.keys():
                    # If not, create a TreeNode for it
                    new_board = current_node.state.copy()
                    new_board[move] = current_player
                    current_node.child_nodes[move] = TreeNode(
                        new_board, current_node)
                current_node = current_node.child_nodes[move]

                # Swap players
                current_player = -current_player

            # Terminal state reached so backpropagate result
            if winner == self.side:
                result = 1.0
            elif winner == -self.side:
                result = 0.0
                result = 0.5
            while current_node is not root_node:
                current_node.visits += 1
                current_node.wins += result
                current_node = current_node.parent

            playout_count += 1

        print "Number of MCTS playouts:", playout_count

        self.root_node = current_node

        # Return move with highest score
        best_move = root_node.best_move()
        return best_move
    def play(self):
        Plays the game, alternating turns between the players.

        Moves are requested sequentially from each player in turn until there is
        a winner. The moves are checked for validity.

            int: the side of the winning player, or None if there was a draw
        if self.shuffle:

        player_cycle = cycle(self.players())

        # Request moves from each player until there is a win or draw
        for player in player_cycle:
            # Uncomment to log board state each turn
            # if self.logger:
            #     self.logger.debug(rules.board_str(self.board))

            # Check for a win or draw
            winning_side = rules.winner(self.board)
            if winning_side is not None:
                winner = self.player(winning_side)
                if self.logger:
                    self.logger.info("{2}\nGame over: {0} win ({1})".format(
                        type(winner).__name__, rules.board_str(self.board)))
                # Return the side of the winning player
                return winning_side
            elif rules.board_full(self.board):
                # The board is full so the game concluded with a draw
                if self.logger:
                    self.logger.info("{0}\nGame over: Draw".format(
                # Return None for a draw
                return None

            # Request a move from the player
            move = player.move(self.board.copy())

            # Apply the move if it is valid
            if rules.valid_move(self.board, move):
                self.board[move] = player.side
                if self.logger:
                    self.logger.fatal("Invalid move")
                raise ValueError("Not a valid move: {0}".format(move))
    def mcts(self, board):
        # Start at tree root (current actual state)
        current_node = self.root_node
        current_player = self.side

        # Select
        while current_node.child_nodes and not current_node.untried_moves:
            # This node has been fully expanded (no untried moves) and is
            # not terminal so use UCB1 to select a child and descend tree
            ucb1 = lambda child: self.ucb1_score(child, current_player)
            child_nodes = sorted(current_node.child_nodes.values(), key=ucb1)

            # Choose move with highest UCB1 score after sorting
            current_node = child_nodes[-1]

            # Swap players
            current_player = -current_player

        # Expand / rollout
        if current_node.untried_moves != []:
            # Now do a random playout since we don't have any
            # information from this move on
            while True:
                # Check for terminal state
                winner = rules.winner(current_node.state)
                if winner or rules.board_full(current_node.state):

                # There are untried moves so pick one at random
                move = current_node.untried_moves.pop(
                move = tuple(move)

                # Note that usually only the first new move is added to the
                # tree (i.e. one node per iteration) possibly to save space,
                # not sure yet

                # Add new node to the tree and remove from untried moves
                new_board = current_node.state.copy()
                new_board[move] = current_player  # apply the move
                current_node.child_nodes[move] = UCTTreeNode(
                    new_board, current_player, current_node)

                # Move down the tree
                current_node = current_node.child_nodes[move]

                # Swap players
                current_player = -current_player

        # Backpropagate
        # Terminal state reached so backpropagate result
        winner = rules.winner(current_node.state)
        while current_node:
            current_node.visits += 1
            if winner == self.side:
                current_node.wins += 1
            elif winner == -self.side:
                current_node.wins += 0
                current_node.wins += 0.5
            current_node = current_node.parent

        self.playout_count += 1
    def minimax(self, board, player):
        Recursive method that returns the optimal next moves and their value.

        The depth of the current move in the tree is recorded so that the agent 
        can favour moves that win quicker (or lose slower) when there are 
        multiple moves with the same expected game result.

            state (numpy.ndarray): two dimensional array representing the
                board state
            player (int): the side of the current player
            depth (int): the depth of the move

            result (int): the return value of the moves (100 - depth for a win, 
                0 for a draw or depth - 100 for a loss)
            optimal_moves ([(int, int)]): a list of the optimal next moves
        empty_cells = rules.empty_cells(board)

        # Choose default cell if board is empty to reduce processing time
        # if len(empty_cells) == board.size:
        #     import numpy as np
        #     return None, np.asarray([(0, 0)])

        # Check if this move resulted in a win or draw (base case)
        winner = rules.winner(board)
        if winner is not None:
            if winner == self.side:
                # Player won so return score for a win
                return 1, None
                # Opponent won so return score for a loss
                return -1, None
        elif rules.board_full(board):
            # Board is full so return score for a draw
            return 0, None

        # Test each child move recursively and add results to the list
        results_list = []
        for cell in empty_cells:
            # Make the move
            cell = tuple(cell)
            board[cell] = player

            # Get the value of this child move and add it to the results
            result, _ = self.minimax(board, -player)

            # Reverse the move
            board[cell] = rules.EMPTY

        if player is self.side:
            # Return best move for player from list of child moves
            max_score = max(results_list)
            max_inds = [
                i for i, x in enumerate(results_list) if x == max_score
            optimal_moves = empty_cells[max_inds]
            return max_score, optimal_moves
            # Return worst move for opponent from list of child moves
            min_element = min(results_list)
            # move = tuple(empty_cells[results_list.index(min_element)])
            # return min_element, move
            return min_element, None  # don't need the actual move