示例#1
0
    def action(self, game: GameState):
        if game.game_over():
            return None

        possibleMoves = game.legal_moves()
        boards = game.children()

        win_h = self.win_heuristic(boards)
        if not win_h == -1:
            return possibleMoves[win_h], 1

        evals = self.heuristic.hs(boards)
        evals = self.default_heuristic(boards, evals)

        if self.variance:
            if not game.turn():
                evals = [1 - e for e in evals]
            total = sum(evals)
            evals = [e / total for e in evals]
            choice = random.random()
            s = 0
            for idx, e in enumerate(evals):
                s += e
                if choice <= s:
                    bestIdx = idx
                    break

        else:
            if game.turn():
                bestIdx = np.argmax(evals)
            else:
                bestIdx = np.argmin(evals)

        bestMove = possibleMoves[bestIdx]

        return bestMove, evals[bestIdx]
示例#2
0
    def play(self, game: GameState, verbose=False):
        if self.alternate:
            temp = self.actionModel1
            self.actionModel1 = self.actionModel2
            self.actionModel2 = temp

        states = []
        hs = []

        if verbose:
            print(game)

        while not game.game_over():
            actingModel = self.actionModel1 if game.turn() else self.actionModel2
            move, h = actingModel.action(game)
            states.append(game.__copy__())
            hs.append(h)
            game.push(move)

            if verbose:
                print("move: " + str(move) + " h: " + str(h))
                print(game)

        return game.winner(), states, hs
示例#3
0
 def h_score(self, game: GameState):
     s = self._score(game)
     return 1 - s if game.turn() else s