Пример #1
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):

        lg.logger_mcts.info('------EVALUATING LEAF------')

        if done == 0:

            value, probs, allowedActions = self.get_preds(leaf.state)
            lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f',
                                leaf.state.playerTurn, value)

            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                newState, _, _ = leaf.state.takeAction(action)
                if newState.id not in self.mcts.tree:
                    node = mc.Node(newState)
                    self.mcts.addNode(node)
                    lg.logger_mcts.info('added node...%s...p = %f', node.id,
                                        probs[idx])
                else:
                    node = self.mcts.tree[newState.id]
                    lg.logger_mcts.info('existing node...%s...', node.id)

                newEdge = mc.Edge(leaf, node, probs[idx], action)
                leaf.edges.append((action, newEdge))

        else:
            lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn,
                                value)

        return ((value, breadcrumbs))
Пример #2
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):

        lg.logger_mcts.debug('------EVALUATING LEAF------')

        if not done:

            state = GameState.from_id(leaf.state_id, config.GRID_SHAPE)
            value, probs, allowedActions = self.get_preds(state)
            lg.logger_mcts.debug('PREDICTED VALUE FOR %d: %f',
                                 state.currentPlayer, value)

            for idx, allowedAction in enumerate(allowedActions):
                if allowedAction:
                    newState, _, _ = state.takeAction(idx)
                    if newState.id not in self.mcts.tree:
                        node = mc.Node(newState)
                        self.mcts.addNode(node)
                        lg.logger_mcts.debug('added node...%s...p = %f',
                                             node.state_id, probs[idx])
                    else:
                        node = self.mcts.tree[newState.id]
                        lg.logger_mcts.debug('existing node...%s...',
                                             node.state_id)

                    newEdge = mc.Edge(leaf, node, probs[idx], idx)
                    leaf.edges.append((idx, newEdge))

        else:
            lg.logger_mcts.debug(
                'GAME VALUE FOR %d: %f',
                GameState.current_player_from_id(leaf.state_id), value)

        return ((value, breadcrumbs))
Пример #3
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):
        if done == 0:

            value, probs, allowedActions = self.get_preds(leaf.state)
            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                newState, _, _ = leaf.state.takeAction(index_to_move(action))
                if newState.id not in self.mcts.tree:
                    node = mc.Node(newState)
                    self.mcts.addNode(node)
                else:
                    node = self.mcts.tree[newState.id]

                newEdge = mc.Edge(leaf, node, probs[idx], action)
                leaf.edges.append((action, newEdge))

        return ((value, breadcrumbs))
Пример #4
0
    def evaluateLeaf(self, leaf, value, done, breadcrumbs):

        lg.logger_mcts.info('------EVALUATING LEAF------')
        #评估该叶子节点,如果不是终态,则进行预测
        if done == 0:
            #经过预测得到的value值已经不是0或者-1了,因为对于非终态(目前棋盘上的棋子暂时无法判断胜负)
            # 的叶子节点返回的价值,无法决定整个棋盘的局势,需要通过神经网络来预测
            value, probs, allowedActions = self.get_preds(leaf.state)
            lg.logger_mcts.info('PREDICTED VALUE FOR %d: %f',
                                leaf.state.playerTurn, value)
            #
            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                # takeAction return (newState, value, done)
                newState, _, _ = leaf.state.takeAction(action)
                if newState.id not in self.mcts.tree:
                    #获取节点的信息
                    #节点的id其实是用棋盘黑白棋的状态合并来表示的
                    #Node id:000000000000000000000000000000000001000000000000000000000000000000000010000000000000
                    #node本身是一个包含了众多信息的“类”
                    node = mc.Node(newState)
                    #添加到树中
                    self.mcts.addNode(node)
                    lg.logger_mcts.info('added node...%s...p = %f', node.id,
                                        probs[idx])
                else:
                    #tree相当于一个数组,id是索引,node是值,用来存储当前的树
                    node = self.mcts.tree[newState.id]
                    lg.logger_mcts.info('existing node...%s...', node.id)
                #Edge: inNode, outNode, prior, action
                #inNode是currentNode,outNode是采取行动后新到达的节点
                #传入了 ['P'] 值
                newEdge = mc.Edge(leaf, node, probs[idx], action)

                leaf.edges.append((action, newEdge))

        else:
            #如果叶子节点即为终态,则直接返回价值0或者-1
            lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn,
                                value)
        #返回当前行棋序列下(棋盘)的价值
        return ((value, breadcrumbs))
Пример #5
0
    def evaluate_leaf(self, leaf, value, over, backtrack):

        if over == 0:
            value, probabilities, allowed = self.get_predictions(leaf.state)

            # maybe trying to only have probs of allowed moves?
            probs = []
            for move in allowed:
                probs.append(probabilities[move])

            for idx, move in enumerate(allowed):
                new_state, _, _ = leaf.state.make_move(move)
                if new_state.id not in self.mcts.tree:
                    node = mc.Node(new_state)
                    self.mcts.add_node(node)
                else:
                    node = self.mcts.tree[new_state.id]

                new_edge = mc.Edge(leaf, node, probs[idx], move)
                leaf.edges.append((move, new_edge))

        return (value, backtrack)