示例#1
0
 def _simulate(self, node: GameState):
     "Returns the reward for a random simulation (to completion) of `node`"
     invert_reward = True
     while True:
         if node.terminal():
             reward = node.reward()
             return 1 - reward if invert_reward else reward
         node = node.push(node.random_move())
         invert_reward = not invert_reward
示例#2
0
    def play(self, game: GameState, verbose=False):
        if self.alternate:
            temp = self.actionModel1
            self.actionModel1 = self.actionModel2
            self.actionModel2 = temp

        states = []
        hs = []

        if verbose:
            print(game)

        while not game.game_over():
            actingModel = self.actionModel1 if game.turn() else self.actionModel2
            move, h = actingModel.action(game)
            states.append(game.__copy__())
            hs.append(h)
            game.push(move)

            if verbose:
                print("move: " + str(move) + " h: " + str(h))
                print(game)

        return game.winner(), states, hs