def __init__(self, node_type, board_state, parent): self.id = str(uuid.uuid4()) # to make it unique self.name = self.id[ -3:] # to make it simple during visualizations (for the test purposes) self.type = node_type # for visualization self.board = board_state # for MCTS untriedMoves = HexBoard.getFreeMoves(board_state) self.untriedMoves = untriedMoves # for MCTS self.children = [] # for MCTS if parent is not None: self.parents = [parent] self.parent_type = parent.type # for visualization else: self.parents = [] # for MCTS self.parent_type = None # for visualization self.searched = False self.value = None self.visit = 0 # for MCTS UCT-Selection and finding the best move self.wins = np.inf # for MCTS UCT-Selection self.loss = np.inf
def search(self, root, itermax, delta, isMaximizer): """Return the best moves based on MCTS""" end_time = datetime.now() + timedelta(seconds=delta) # now = time.time() while datetime.now() < end_time and itermax > 0: # We don't want to change the game, we will turn to it in each iteration game_state = deepcopy(self.game) # we will change the root (will be Expanded) node = root path = [root.board.tobytes()] # Select # node is fully expanded and non-terminal while (node.untriedMoves == []) and (node.children != []) and ( not game_state.isTerminal()): node = self.UCTSelectChild(node, isMaximizer) m = HexBoard.getMove(game_state.board, node.board) game_state = HexBoard.makeMove(m, game_state) path.append(node.board.tobytes()) # Expand if (node.untriedMoves != []) and (not game_state.isTerminal()): # node is expanded and updated with child node = self.Expand(node, game_state) path.append(node.board.tobytes()) # Playouts for p in range(self.N): # for each playout we want to return to same game_state _game = deepcopy(game_state) while not _game.isTerminal(): move = choice(HexBoard.getFreeMoves(_game.board)) _game = HexBoard.makeMove(move, _game) # This works just once for a particular node if node.wins == np.inf: node.wins = 0 if node.loss == np.inf: node.loss = 0 if _game.checkWin(_game.maximizer): node.wins += 1 else: node.loss += 1 # print(f'Playout {p} is done!') # Backpropagate # We are removing current node from path path.pop() while node is not None: # backpropagate works from the current node to the root node if len(path) > 0: parent = node.getParent(path.pop()) if parent.wins == np.inf: parent.wins = 0 parent.loss = 0 parent.wins += node.wins parent.loss += node.loss else: parent = None node.visit += 1 node = parent itermax -= 1 # print(f"Iteration completed!: It took {time.time() - now}s") sortedList = sorted(root.children, key=lambda c: c.visit) # return the move that was most visited return HexBoard.getMove(root.board, sortedList[-1].board), root