def test_mcts_convergence(counter_game, counter_game_nn): root_state = counter_game() root_node = mcts.Node(root_state, np.array([0.3, 0.5]), counter_game_nn()) for _ in range(500): root_node.expand() assert root_node.edges[0].Q - root_node.edges[1].Q > 0.5
def human_play(self, move): node = self.mctree.tree board = node.get_board() children = node.get_children() children_moves = [] for child in children: sub_board = child.get_board() sub_last_move = sub_board.last_move children_moves.append(sub_last_move) if move in children_moves: idx = children_moves.index(move) sub_node = children[idx] self.mctree.tree = sub_node winner = sub_node.board.get_a_winner() return winner else: next_state = board.get_next_state_by_move(move) sub_board = board_wuzi.Board(state=next_state, last_move=move, **self.kwargs) p, v = self.nn.predict(next_state) sub_node = mcts.Node(sub_board, p, v) self.mctree.tree = sub_node winner = sub_board.get_a_winner() return winner
def evaluateLeaf(self, leaf, value, done, breadcrumbs): lg.logger_mcts.info('------EVALUATING LEAF------') if done == 0: value, probs, allowedActions = self.get_preds(leaf.state) lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f', leaf.state.playerTurn, value) probs = probs[allowedActions] for idx, action in enumerate(allowedActions): newState, _, _ = leaf.state.takeAction(action) if newState.id not in self.mcts.tree: node = mcts.Node(newState) self.mcts.addNode(node) lg.logger_mcts.info('added node...%s...p = %f', node.id, probs[idx]) else: node = self.mcts.tree[newState.id] lg.logger_mcts.info('existing node...%s...', node.id) newEdge = mcts.Edge(leaf, node, probs[idx], action) leaf.edges.append((action, newEdge)) else: lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value) return ((value, breadcrumbs))
def test_get_leaf_states(counter_game, counter_game_nn): root_state = counter_game() root_node = mcts.Node(root_state, np.array([0.3, 0.5]), counter_game_nn()) for _ in range(2): root_node.expand() leaf_states = list(root_node.get_leaf_states()) assert len(leaf_states) != 0
def apply_move(self, move): self.state.make_move(move) match = list(filter(lambda node: node.move == move, self.root.children)) if len(match): self.root = match[0] self.root.parent = None else: self.root = mcts.Node(None, move)
def main(): root = mcts.Node(initial_state(), 0, 0) while not is_game_over(root.state): root = mcts.mcts(root) render(root.state) input_action = int(input("Action: ")) if input_action in root.unvisited_actions: newState = play(root.state, input_action) root = mcts.Node(newState, root.action, input_action) else: for child in root.children: if child.action == input_action: root = child render(root.state)
def __init__(self, game, time=None, iterations=None, c=2): if time is None and iterations is None: time = 1000 self.c = c self.millis_to_think = time self.iterations = iterations self.state = game.State() self.root = mcts.Node(None, None)
def makeMove(self, node, move): if move not in node.childNodes: node = mcts.Node(node, move, node.turn ^ 1) else: node = node.childNodes[move] node.isSearchRoot = True node.parent.childNodes.clear() node.parent.isSearchRoot = False site = 1 << move self.AddSite(site) return node
def __init__(self): self.start = time.time() super().__init__(0, 0x0000000810000000, 0x0000001008000000) self.model = Model() #self.model.load('model/Gen' + str(0)) self.score = 0 self.table = {} self.currentNode = mcts.Node(mcts.FakeNode(), 0, 0, Core(self.turn, self.black, self.white)) self.currentNode.isGameRoot = True self.currentNode.isSearchRoot = True self.mctsBatch = mcts.MCTSBatch(self.model,NUM_MCTS)
def search(self, game, time_left): self.tree = mcts.Node(copy(game)) self.time_left = time_left while True: if self.time_left() < self.TIMER_THRESHOLD: break self.tree.explore(self.policy) self.tree_next, _ = self.tree.next(temperature=2) return self.tree_next.game.last_move[0]
def makeMove(self, node, move): if move not in node.childNodes: node = mcts.Node(node, move, node.turn^1) else: node = node.childNodes[move] node.isSearchRoot = True node.parent.childNodes.clear() node.parent.isSearchRoot = False site = 1 << move if self.judge & site: self.AddSite(site) else: print("いや打てへんやん") exit() return node
def InputEnemy(self): nb, nw = self.Count() nmTurn = 64 - nb - nw if nmTurn < 16: site, socre, self.table = moveAI(self.black,self.white,self.turn, nmTurn, self.table, self.score) self.score = socre self.AddSite(site) else: # site, score, self.table = moveAINN(self.black,self.white,self.turn, 3, self.table, self.score,self.model) # self.score = score # self.AddSite(site) self.currentNode = mcts.Node(mcts.FakeNode(), 0, self.turn, Core(self.turn, self.black, self.white)) self.currentNode.isGameRoot = True self.currentNode.isSearchRoot = True pi = self.mctsBatch.alpha([self.currentNode], 1)[0] print(pi) self.currentNode = self.makeMove(self.currentNode,int(np.argmax(pi)))
def InputPlayer(self): nb, nw = self.Count() nmTurn = 64 - nb - nw if nmTurn < RAND: self.currentNode = mcts.Node( mcts.FakeNode(), 0, self.turn, Core(self.turn, self.black, self.white)) self.currentNode.isGameRoot = True self.currentNode.isSearchRoot = True pi = self.mctsBatch.alpha([self.currentNode], 1)[0] move = int(np.argmax(pi)) self.addHistory(pi, self.turn) self.currentNode = self.makeMove(self.currentNode, move) else: while True: move = random.randrange(0, 64) site = 1 << move if self.judge & site: # self.AddSite(site) self.currentNode = self.makeMove(self.currentNode, move) break
def tst_example_children_finalcolum(): #final column check ok state = np.array([[1, 2, 3, 4, 0], [1, 2, 3, 0, 0], [0, 2, 3, 0, 0], [0, 2, 0, 0, 0], [0, 0, 0, 0, 0]]) rootnode = mcts.Node(state, [5, 6], None, ('R', None), ((None, None), (None, None))) state_path = [] queue = [rootnode] while len(queue) > 0: node = queue.pop(0) if node.type == 'R': if 5 not in node.remain_rooms: state_path.append(deepcopy(node.state)) node.expand() if node.terminal is False: for child in node.children: queue.append(child) states_path = state_path[0:48] roomids = [1, 2, 3, 4, 5, 6] Cons = np.ones((6, 6)) vis = mcts.Visualisation(roomids, states_path, Cons, 'None') vis.vis_static()
def real_game(modelname, time_limit, recommendation_count): mode = ask_question("What gamemode are you playing?", ["ap", "cm"]) side = ask_question("Which side are you playing on?", ["radiant", "dire"]) first = ask_question("Do you have first pick / ban?", ["y", "n"]) if mode == "ap": util.pick_ban_order = util.allpick_order else: util.pick_ban_order = util.cm_order radiant_goes_first = (side == "radiant" and first == "y") or (side == "dire" and first == "n") node = mcts_transpositions.Node(mcts.State(radiant_goes_first)) transpositions = dict() model = util.load_model(modelname) players_turn = (side == "radiant" and node.state.radiant_moves_next) or ( side == "dire" and not node.state.radiant_moves_next) while not node.state.is_terminal(): print_state(node.state) choices = node.state.get_actions() choices_sets = [set(i) for i in choices] (pick_ban, count) = util.pick_ban_order[node.state.pick_ban_position] subject = 'pick' if pick_ban == util.pick else 'ban' print("The next action is a", subject, "of", count, "heroes.") if players_turn: print("It is your turn. MCTS recommends the following heroes: ...") (_, root_node, transpositions) = mcts_transpositions.uct_search( model, initial_node=node, time_limit=time_limit, transpositions=transpositions) node = root_node def to_transpo(n): return transpositions[mcts_transpositions.state_to_key( n.state)] children = sorted( root_node.children, key=lambda n: to_transpo( n).total_simulated_reward / to_transpo(n).visit_count, reverse=True) for c in children[:recommendation_count]: print([ util.simple_heroes.ordered_to_name(i) for i in c.incoming_action ], to_transpo(c).total_simulated_reward / to_transpo(c).visit_count, to_transpo(c).visit_count) else: print("It is the other team's turn. What did they do?") players_turn = not players_turn choice = get_pick(node.state, pick_ban, count) print() assert (set(choice) in choices_sets) found = False for n in node.children: if n.incoming_action == choice: node = n node.parent = None node.incoming_action = None found = True if not found: node = mcts.Node(node.state.get_next_state(choice)) print('Done!') print_state(node.state) print('Predicting Radiant win probability with all models:') for model_name in util.all_models: model = util.load_model(model_name) print( model_name, ':', util.predict_radiant_win_probability( util.state_to_feature(node.state), model))
def find_move(self, board, min_kldiv=0, max_rolls=0, max_time=0, pvs=0, temperature=False, use_mcts=True): """ Searches until kl_div is below `min_kldiv` or for `movetime' milliseconds, or if 0, for `rolls` rollouts. """ # We try to reuse the previous node, but if we can't, we create a new one. if self.node: # Check if the board is at one of our children (cheap pondering) for node in self.node.children: if node.board == board: self.node = node if self.args.debug: print('info string Reusing node from ponder.') break # If we weren't able to find the board, make a new node. # Note the node.children check: If the node is a reused node and # at a repeated position, it will think the game is over, but we # still want it to continue playing. if not self.node or self.node.board != board or not self.node.children: vec = self.args.model.from_scratch(board) self.node = mcts.Node(board, vec, None, 0, self.args) if self.args.debug: print('info string Creating new root node.') # Print priors for new root node. while self.node.N < 2: # Ensure children are expanded self.node.rollout() nodes = sorted(self.node.children, key=lambda n: n.P, reverse=True)[:7] print('info string priors', ', '.join(f'{board.san(n.move)} {n.P:.1%}' for n in nodes)) # Find move to play self.should_stop = False kl_div = 1 rolls = 0 start_time = time.time() if use_mcts: first = True for i in itertools.count(): rolls += 1 self.node.rollout() if self.should_stop or \ max_time > 0 and time.time() > start_time + max_time or \ max_rolls > 0 and rolls >= max_rolls: break if (i + 1) % STAT_INTERVAL == 0: kl_div = self.print_stats(first, pvs) if min_kldiv > 0 and kl_div < min_kldiv: break first = False # Pick best or random child if temperature: if use_mcts: counts = [(n.N / self.node.N)**(1 / temperature) for n in self.node.children] else: counts = [n.P**(1 / temperature) for n in self.node.children] node = random.choices(self.node.children, weights=counts)[0] if self.args.debug: o = sorted(self.node.children, key=lambda n: -n.N).index(node) # From https://codegolf.stackexchange.com/questions/4707#answer-4712 ordinal = (lambda n: "%d%s" % (n, "tsnrhtdd"[ (n / 10 % 10 != 1) * (n % 10 < 4) * n % 10::4]))(o + 1) self.node = node else: self.node = max(self.node.children, key=lambda n: n.N) stats = Stats(kl_div, rolls, time.time() - start_time) return self.node, stats
def reset(self): self.state = game.State() self.root = mcts.Node(None, None)
def InputEnemy(self): nb, nw = self.Count() nmTurn = 64 - nb - nw if nmTurn < 16: site, maxScore, sumScore, self.table = moveAITrain( self.black, self.white, self.turn, nmTurn, self.table, nw - nb) if maxScore > 0: maxScore = 1 elif maxScore < 0: maxScore = -1 site.sort(reverse=True) pi = np.zeros(64) for move in site: pi[bitFind(move[1])] = move[0] / (sumScore if sumScore else 1) self.addHistory(pi, self.turn) temp = copy.deepcopy(self.history) self.addValue(self.history, maxScore) for i in range(len(site) // 4 + 1): history2 = copy.deepcopy(temp) self.AddSite(site[i][1]) self.NextBoard() site2, maxScore2, sumScore2, self.table = moveAITrain( self.black, self.white, self.turn ^ 1, nmTurn - 1, self.table, nw - nb) self.NextBoard() maxScore2 = -maxScore2 if maxScore2 > 0: maxScore2 = 1 elif maxScore2 < 0: maxScore2 = -1 if site2: site2.sort(reverse=True) pi2 = np.zeros([64]) for move2 in site2: pi2[bitFind(move2[1])] = move2[0] / (sumScore2 if sumScore2 else 1) features = [] features.append(self.black) features.append(self.white) features.append(self.judge) history2.append([features, pi2, self.turn ^ 1]) self.addValue(history2, maxScore2) self.history.extend(history2) self.AddSite(0) elif nmTurn < RAND: self.currentNode = mcts.Node( mcts.FakeNode(), 0, self.turn, Core(self.turn, self.black, self.white)) self.currentNode.isGameRoot = True self.currentNode.isSearchRoot = True pi = self.mctsBatch.alpha([self.currentNode], 1)[0] move = int(np.argmax(pi)) self.addHistory(pi, self.turn) self.currentNode = self.makeMove(self.currentNode, move) else: while True: move = random.randrange(0, 64) site = 1 << move if self.judge & site: # self.AddSite(site) self.currentNode = self.makeMove(self.currentNode, move) break
def buildMCTS(self, state): lg.logger_mcts.info( '****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name) self.root = mcts.Node(state) self.mcts = mcts.MCTS(self.root, self.cpuct)