示例#1
0
    def h(self, game: GameState):
        if game.terminal():
            return game.winner()

        for _ in range(self.nbr_rollouts):
            self.do_rollout(game)

        return self.h_score(self.choose(game))
示例#2
0
 def _simulate(self, node: GameState):
     "Returns the reward for a random simulation (to completion) of `node`"
     invert_reward = True
     while True:
         if node.terminal():
             reward = node.reward()
             return 1 - reward if invert_reward else reward
         node = node.push(node.random_move())
         invert_reward = not invert_reward
示例#3
0
    def choose(self, node: GameState):
        def score(n):
            if self.N[n] == 0:
                return float("-inf")  # avoid unseen moves
            return self.Q[n] / self.N[n]  # average reward

        "Choose the best successor of node. (Choose a move in the game)"
        if node.terminal():
            raise RuntimeError(f"choose called on terminal node {node}")

        if node not in self.children:
            return node.random_child()

        return max(self.children[node], key=score)