Python HexBoard.getFreeMoves示例

    def __init__(self, node_type, board_state, parent):
        self.id = str(uuid.uuid4())  # to make it unique
        self.name = self.id[
            -3:]  # to make it simple during visualizations (for the test purposes)
        self.type = node_type  # for visualization
        self.board = board_state  # for MCTS

        untriedMoves = HexBoard.getFreeMoves(board_state)
        self.untriedMoves = untriedMoves  # for MCTS
        self.children = []  # for MCTS
        if parent is not None:
            self.parents = [parent]
            self.parent_type = parent.type  # for visualization
        else:
            self.parents = []  # for MCTS
            self.parent_type = None  # for visualization

        self.searched = False
        self.value = None
        self.visit = 0  # for MCTS UCT-Selection and finding the best move
        self.wins = np.inf  # for MCTS UCT-Selection
        self.loss = np.inf

示例#2

显示文件

文件： MCTS.py 项目： MuradBozik/RL-Assignment-2

    def search(self, root, itermax, delta, isMaximizer):
        """Return the best moves based on MCTS"""
        end_time = datetime.now() + timedelta(seconds=delta)
        # now = time.time()
        while datetime.now() < end_time and itermax > 0:
            # We don't want to change the game, we will turn to it in each iteration
            game_state = deepcopy(self.game)
            # we will change the root (will be Expanded)
            node = root

            path = [root.board.tobytes()]

            # Select
            # node is fully expanded and non-terminal
            while (node.untriedMoves == []) and (node.children != []) and (
                    not game_state.isTerminal()):
                node = self.UCTSelectChild(node, isMaximizer)
                m = HexBoard.getMove(game_state.board, node.board)
                game_state = HexBoard.makeMove(m, game_state)
                path.append(node.board.tobytes())

            # Expand
            if (node.untriedMoves != []) and (not game_state.isTerminal()):
                # node is expanded and updated with child
                node = self.Expand(node, game_state)
                path.append(node.board.tobytes())

            # Playouts
            for p in range(self.N):
                # for each playout we want to return to same game_state
                _game = deepcopy(game_state)
                while not _game.isTerminal():
                    move = choice(HexBoard.getFreeMoves(_game.board))
                    _game = HexBoard.makeMove(move, _game)

                # This works just once for a particular node
                if node.wins == np.inf:
                    node.wins = 0

                if node.loss == np.inf:
                    node.loss = 0

                if _game.checkWin(_game.maximizer):
                    node.wins += 1
                else:
                    node.loss += 1

                # print(f'Playout {p} is done!')

            # Backpropagate

            # We are removing current node from path
            path.pop()

            while node is not None:
                # backpropagate works from the current node to the root node
                if len(path) > 0:
                    parent = node.getParent(path.pop())
                    if parent.wins == np.inf:
                        parent.wins = 0
                        parent.loss = 0
                    parent.wins += node.wins
                    parent.loss += node.loss
                else:
                    parent = None

                node.visit += 1
                node = parent
            itermax -= 1
        # print(f"Iteration completed!: It took {time.time() - now}s")
        sortedList = sorted(root.children, key=lambda c: c.visit)

        # return the move that was most visited
        return HexBoard.getMove(root.board, sortedList[-1].board), root