Exemplo n.º 1
0
 def best_child(self, p_node):
     if self.log:
         print('jump')
         print(p_node)
     node_v_para = 2 * math.log(p_node.visited)
     uct_dict = {}
     for m, (c, p) in p_node.moves.items():
         Q = get_max_difference(c.value, p_node.act_id) / max(c.value) if max(c.value) != 0 else 0
         # N = 1 / c.visited if c.visited != 0 else MAX
         N = ((node_v_para / c.visited) ** (1 / 2)) if c.visited != 0 else MAX
         uct_value = Q + p + N
         uct_dict[c] = uct_value
     node = randomMax(uct_dict)
     return node
Exemplo n.º 2
0
    def jump(self, node):
        if self.log:
            print('jump')
            print(node)
        node_v_para = 2 * math.log(node.visited)
        uct_dict = {}
        for m, (c, p) in node.moves.items():
            Q = get_max_difference(c.value, self.id) / max(c.value) if max(
                c.value) != 0 else 0
            N = 1 / c.visited if c.visited != 0 else MAX
            # N = ((node_v_para/c.visited)**(1/2)) if c.visited!=0 else MAX
            uct_value = Q + p + N
            uct_dict[c] = uct_value
        uc_node = randomMax(uct_dict)
        uc_node_v_para = 2 * math.log(
            uc_node.visited) if uc_node.visited != 0 else 1

        uct_dict = {}
        for m, (c, p) in uc_node.moves.items():
            Q = get_max_difference(c.value, self.id) / max(c.value) if max(
                c.value) != 0 else 0
            N = 1 / c.visited if c.visited != 0 else MAX
            # N = ((uc_node_v_para/c.visited))**(1/2) if c.visited!=0 else MAX
            uct_value = Q + p + N
            uct_dict[c] = uct_value

        if len(uct_dict) == 0:
            if self.log:
                print('reach the end, jump to the uc_node')
                print(uc_node)
            return uc_node
        jump_node = randomMax(uct_dict)
        if self.log:
            print('normal jump to the node')
            print(jump_node)
        return jump_node
Exemplo n.º 3
0
    def search(self, moves, game_state, player_order):
        self.tree = []
        self.init_game_state = game_state
        self.init_moves = moves
        self.player_order = player_order

        state = self.init_game_state
        parent = None
        f_move = None
        act_id = self.id
        root_node = Node(state, parent, f_move, act_id, self.tree)
        self.root_node = root_node

        self.time_monitor = defaultdict(float)

        start = time.time()
        n = 0
        # while n<= 4:
        # while True:
        nodes_len = (2**(len(moves)**(1 / 2)))

        while time.time() - start < len(moves) * SEARCH_TIME:
            #while n<nodes_len:
            #while time.time() - start < 0.2474:
            #a = input('input')
            n += 1
            self.one_search(root_node)
        print('searched times', n)
        print('nodes:', len(self.tree))
        print('{} finished'.format(str(self.agent.__class__)))
        print('seach duration', time.time() - start)
        print('distribute', self.time_monitor)
        print()

        dict = {}
        for m, (c, p) in root_node.moves.items():
            Q = get_max_difference(c.value,
                                   self.id) if c is not None else -1000
            dict[m] = Q
        move = randomMax(dict)
        track = self.get_predict_track(root_node, move)
        if USING_GUI:
            Gui(self.tree, 'mcts save')
        return move