def good_print(self, board, node, color, num_nodes): cboard = board.copy() sys.stderr.write("\nTaking a tour of selection policy in tree! \n\n") sys.stderr.write(cboard.get_twoD_board()) sys.stderr.flush() while not node.is_leaf(): if node._move != None: if node._move != PASS: pointString = board.point_to_string(move) else: pointString = node._move else: pointString = 'Root' sys.stderr.write("\nMove: {} Numebr of children {}, Number of visits: {}\n" .format(pointString,len(node._children),node._n_visits)) sys.stderr.flush() moves_ls = [] max_flag = color == BLACK for move,child in node._children.items(): uctval = uct_val(node,child,self.exploration,max_flag) moves_ls.append((move,uctval,child)) moves_ls = sorted(moves_ls,key=lambda i:i[1],reverse=True) if moves_ls: sys.stderr.write("\nPrinting {} of {} childs that have highest UCT value \n\n".format(num_nodes, pointString)) sys.stderr.flush() for i in range(num_nodes): move = moves_ls[i][0] child_val = moves_ls[i][1] child_node = moves_ls[i][2] if move !=PASS: sys.stderr.write("\nChild point:{} ;UCT Value {}; Number of visits: {}; Number of Black wins: {}\n" .format(cboard.point_to_string(move), child_val, child_node._n_visits, child_node._black_wins)) sys.stderr.flush() else: sys.stderr.write("\nChild point:{} ;UCT Value {}; Number of visits: {}; Number of Black wins: {} \n" .format(move, child_val, child_node._n_visits, child_node._black_wins)) sys.stderr.flush() # Greedily select next move. max_flag = color == BLACK move, next_node = node.select(self.exploration,max_flag) if move==PASS: move = None assert cboard.check_legal(move, color) pointString = cboard.point_to_string(move) cboard.move(move, color) sys.stderr.write("\nBoard in simulation after chosing child {} in tree. \n".format(pointString)) sys.stderr.write(cboard.get_twoD_board()) sys.stderr.flush() color = GoBoardUtilGo4.opponent(color) node = next_node assert node.is_leaf() cboard.current_player = color leaf_value = self._evaluate_rollout(cboard, color) sys.stderr.write("\nWinner of simulation is: {} color, Black is 0 an \n".format(leaf_value)) sys.stderr.flush()
def update_with_move(self, last_move): """ Step forward in the tree, keeping everything we already know about the subtree, assuming that get_move() has been called already. Siblings of the new root will be garbage-collected. """ if last_move in self._root._children: self._root = self._root._children[last_move] else: self._root = TreeNode(None) self._root._parent = None self.toplay = GoBoardUtilGo4.opponent(self.toplay)
def simulate(self, board, cboard, move, toplay): GoBoardUtilGo4.copyb2b(board, cboard) assert cboard.board.all() == board.board.all() cboard.move(move, toplay) opp = GoBoardUtilGo4.opponent(toplay) return GoBoardUtilGo4.playGame(cboard, opp, komi=self.komi, limit=self.limit, simulation_policy=simulations, use_pattern=self.use_pattern, check_selfatari=self.check_selfatari)
def _playout(self, board, color): """ Run a single playout from the root to the given depth, getting a value at the leaf and propagating it back through its parents. State is modified in-place, so a copy must be provided. Arguments: board -- a copy of the board. color -- color to play Returns: None """ node = self._root node._use_knowledge = self.in_tree_knowledge # This will be True only once for the root if not node._expanded: node.expand(board, color) #Avoid the division by zero errors if self.in_tree_knowledge == "probabilistic": b_wins_sum, n_visit_sum = 0,0 for child in node._children.values(): b_wins_sum += child._black_wins n_visit_sum += child._n_visits node._black_wins = b_wins_sum node._n_visits = n_visit_sum while not node.is_leaf(): # Greedily select next move. max_flag = color == BLACK move, next_node = node.select(self.exploration,max_flag) if move!=PASS: assert board.check_legal(move, color) if move == PASS: move = None board.move(move, color) color = GoBoardUtilGo4.opponent(color) node = next_node assert node.is_leaf() if not node._expanded: node.expand(board, color) assert board.current_player == color leaf_value = self._evaluate_rollout(board, color) # Update value and visit count of nodes in this traversal. node.update_recursive(leaf_value)
def _playout(self, board, color): """ Run a single playout from the root to the given depth, getting a value at the leaf and propagating it back through its parents. State is modified in-place, so a copy must be provided. Arguments: board -- a copy of the board. color -- color to play Returns: None """ node = self._root # This will be True only once for the root if not node._expanded: node.expand(board, color) while not node.is_leaf(): # Greedily select next move. max_flag = color == BLACK move, next_node = node.select(self.exploration, max_flag) if move != PASS: assert board.check_legal(move, color) if move == PASS: move = None board.move(move, color) color = GoBoardUtilGo4.opponent(color) node = next_node assert node.is_leaf() if not node._expanded: node.expand(board, color) assert board.current_player == color leaf_value = self._evaluate_rollout(board, color) # Update value and visit count of nodes in this traversal. node.update_recursive(leaf_value)