예제 #1
0
    def getBestAction(self, rootstate, itermax, verbose=False, temp=1):
        """ Conduct an ISMCTS search for itermax iterations starting from rootstate.
            Return the best move from the rootstate.
        """

        rootnode = Node(self.nnet)

        for i in range(itermax):
            node = rootnode
            # Determinize
            game = self.CloneAndRandomize(rootstate)
            # Select
            while Game.getGameEnded(
                    game.current_player, game) == 0 and node.GetUntriedMoves(
                        Game.GetValidMoves(
                        )) == []:  # node is fully expanded and non-terminal
                node = node.SelectChild(Game.GetValidMoves())
                Game.getNextState(game, game.current_player, node.move)

            #
            untriedMoves = node.GetUntriedMoves(
                Game.getValidMoves(game) * node.pi)
            if untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
                m = random.choice(untriedMoves)
                player = game.current_player
                Game.getNextState(game, player, m)
                node = node.AddChild(
                    m, player,
                    Game.getState(game))  # add child and descend tree

            # Simulate
            while Game.getGameEnded(node.playerJustMoved,
                                    game) == 0:  # while state is non-terminal
                Game.getNextState(game, game.current_player,
                                  random.choice(np.argwhere(node.pi == 1)))

            # Backpropagate
            while node != None:  # backpropagate from the expanded node and work back to the root node
                node.Update(game)
                node = node.parentNode

        # Output some information about the tree - can be omitted
        # if verbose:
        #     print(rootnode.TreeToString(0){})
        # else:
        #     print(rootnode.ChildrenToString())
        return max(rootnode.childNodes, key=lambda c: c.visits**1 / temp
                   ).move  # return the move that was most visited
예제 #2
0
class YEET:
    """
    This class specifies the base Game class. To define your own game, subclass
    this class and implement the functions below. This works when the game is
    two-player, adversarial and turn-based.

    Use 1 for player1 and -1 for player2.
    21 possible actions per move, and 8 possible targets per action + 1 if no targets
    is_basic = True initializes game between priest and rogue only
    """
    def __init__(self, is_basic=True):
        self.num_actions = 21
        self.players = ['player1', 'player2']
        self.is_basic = is_basic
        self.b = Board()

    def getInitGame(self):
        """
        Returns:
            startBoard: a representation of the board (ideally this is the form
                        that will be the input to your neural network)
        """
        # self.b.isolateSet()
        self.b.initGame()
        return self.b.game

    def getNextState(self, player, action, game_instance=Board.game):
        """
        Input:
            board: current board (gameUtils)
            player: current player (1 or -1)
            action: action taken by current player

        Returns:
            nextBoard: board after applying action
            nextPlayer: player who plays in the next turn (should be -player)

            all actions executed by copy_player to preserve new game

        """
        if game_instance == None:
            game_instance = Board.game

        try:
            self.b.performAction(action, player, game_instance)
        except GameOver:
            raise GameOver
        next_state = self.b.getState(player, game_instance)
        if action[0] != 19:
            return next_state, player
        else:
            return next_state, -player

    def getValidMoves(self, player, game_instance=Board.game):
        """
        Input:
            board: current board
            player: current player

        Returns:
            validMoves: a binary vector of length self.getActionSize(), 1 for
                        moves that are valid from the current board and player,
                        0 for invalid moves
        """
        # if player == 1:
        #     current_player = self.b.players[0]
        # elif player == -1:
        #     current_player = self.b.players[1]
        if game_instance == None:
            game_instance = Board.game
        return self.b.getValidMoves(game_instance)

    def getGameEnded(self, game_instance=Board.game):
        """
        Input:
            board: current board
            player: current player (1 or -1)

        Returns:
            r: 0 if game has not ended. 1 if player won, -1 if player lost,
               small non-zero value for draw.
        """
        if game_instance == None:
            game_instance = Board.game

        p1 = game_instance.player_to_start

        if p1.playstate == 4:
            return 1
        elif p1.playstate == 5:
            return -1
        elif p1.playstate == 6:
            return 0.0001
        elif game_instance.turn > 180:
            game_instance.ended = True
            return 0.0001
        return 0

    def getState(self, player, game_instance=Board.game):
        """
        Input:
            board: current board
            player: current player (1 or -1)

        Returns:
            state: see gameUtils.getState for info
        """
        # if player == 1:
        #     current_player = self.b.players[0]
        # elif player == -1:
        #     current_player = self.b.players[1]
        if game_instance == None:
            game_instance = Board.game

        return self.b.getState(player, game_instance)
        # return b.game

    def getSymmetries(self, state, pi):
        """
        Input:
            board: current board
            pi: policy vector of size self.getActionSize()

        Returns:
            symmForms: a list of [(board,pi)] where each tuple is a symmetrical
                       form of the board and the corresponding pi vector. This
                       is used when training the neural network from examples.
        """
        # assert(len(pi) == len(state))
        assert (len(pi) == 168)
        pi_board = np.reshape(pi, (21, 9))
        l = []

        for i in range(1, 5):
            for j in [True, False]:
                newS = np.rot90(state, i)
                newPi = np.rot90(pi_board, i)
                if j:
                    newS = np.fliplr(newS)
                    newPi = np.fliplr(newPi)
                l += [(newS, list(newPi.ravel()) + [pi[-1]])]
        return l

    def stringRepresentation(self, state):
        """
        Input:
            state: np array of state

        Returns:
            boardString: a quick conversion of board to a string format.
                         Required by MCTS for hashing.
        """
        return state.tostring()