def UCT(rootstate, maxiters, verbose=False): root = Node(board=rootstate) for i in range(maxiters): node = root board = rootstate.copy() # selection - select best child if parent fully expanded and not terminal while node.untried_actions == [] and node.children != []: node = node.select() board.move(node.action) # expansion - expand parent to a random untried action if node.untried_actions != [] and not board.result(): a = random.choice(node.untried_actions) board.move(a) node = node.expand(a, board.copy()) while board.get_moves() != [] and not board.result(): board.move(random.choice(board.get_moves())) # backpropagation while node != None: result = board.result() if result: if node.board.player == board.player: result = 1 else: result = -1 else: result = 0 node.update(result) node = node.parent # Output some information about the tree - can be omitted if (verbose): print(root.TreeToString(0)) else: print(root.ChildrenToString()) s = sorted(root.children, key=lambda c: c.wins / c.visits) return {"action": s[-1].action, "children": root.get_children()}
def UTC(root_state, itermax, verbose=False): rootnode = Node(state=root_state) for i in range(itermax): node = rootnode state = root_state.Clone() # basicly while node is fully expanded while node.untried_moves == [] and node.childNodes != []: # Select node = node.UTCSelectChild() state.DoMove(node.move) # Expand if node.untried_moves != []: m = random.choice(node.untried_moves) state.DoMove(m) node = node.AddChild(m, state) # Rollout while state.GetMoves() != []: # so its while state is non-terminal state.DoMove(random.choice(state.GetMoves())) #Backprop while node != None: #backprop from the expanded node and work it back into the root node.Update(state.GetResult(node.player_moved)) node = node.parentNode if (verbose): print(rootnode.TreeToString(0)) else: print(rootnode.ChildrenToString()) print(rootnode.childNodes) return sorted(rootnode.childNodes, key=lambda c: c.visits)[-1].move