def best_child(self, p_node): if self.log: print('jump') print(p_node) node_v_para = 2 * math.log(p_node.visited) uct_dict = {} for m, (c, p) in p_node.moves.items(): Q = get_max_difference(c.value, p_node.act_id) / max(c.value) if max(c.value) != 0 else 0 # N = 1 / c.visited if c.visited != 0 else MAX N = ((node_v_para / c.visited) ** (1 / 2)) if c.visited != 0 else MAX uct_value = Q + p + N uct_dict[c] = uct_value node = randomMax(uct_dict) return node
def jump(self, node): if self.log: print('jump') print(node) node_v_para = 2 * math.log(node.visited) uct_dict = {} for m, (c, p) in node.moves.items(): Q = get_max_difference(c.value, self.id) / max(c.value) if max( c.value) != 0 else 0 N = 1 / c.visited if c.visited != 0 else MAX # N = ((node_v_para/c.visited)**(1/2)) if c.visited!=0 else MAX uct_value = Q + p + N uct_dict[c] = uct_value uc_node = randomMax(uct_dict) uc_node_v_para = 2 * math.log( uc_node.visited) if uc_node.visited != 0 else 1 uct_dict = {} for m, (c, p) in uc_node.moves.items(): Q = get_max_difference(c.value, self.id) / max(c.value) if max( c.value) != 0 else 0 N = 1 / c.visited if c.visited != 0 else MAX # N = ((uc_node_v_para/c.visited))**(1/2) if c.visited!=0 else MAX uct_value = Q + p + N uct_dict[c] = uct_value if len(uct_dict) == 0: if self.log: print('reach the end, jump to the uc_node') print(uc_node) return uc_node jump_node = randomMax(uct_dict) if self.log: print('normal jump to the node') print(jump_node) return jump_node
def search(self, moves, game_state, player_order): self.tree = [] self.init_game_state = game_state self.init_moves = moves self.player_order = player_order state = self.init_game_state parent = None f_move = None act_id = self.id root_node = Node(state, parent, f_move, act_id, self.tree) self.root_node = root_node self.time_monitor = defaultdict(float) start = time.time() n = 0 # while n<= 4: # while True: nodes_len = (2**(len(moves)**(1 / 2))) while time.time() - start < len(moves) * SEARCH_TIME: #while n<nodes_len: #while time.time() - start < 0.2474: #a = input('input') n += 1 self.one_search(root_node) print('searched times', n) print('nodes:', len(self.tree)) print('{} finished'.format(str(self.agent.__class__))) print('seach duration', time.time() - start) print('distribute', self.time_monitor) print() dict = {} for m, (c, p) in root_node.moves.items(): Q = get_max_difference(c.value, self.id) if c is not None else -1000 dict[m] = Q move = randomMax(dict) track = self.get_predict_track(root_node, move) if USING_GUI: Gui(self.tree, 'mcts save') return move