Пример #1
0
class DefaultAlphaZeroPlayer(Player):
    def __init__(self,
                 game,
                 arg_file: typing.Optional[str] = None,
                 name: str = "") -> None:
        super().__init__(game, arg_file, name, parametric=True)
        if self.player_args is not None:
            self.args = DotDict.from_json(self.player_args)

            self.model = DefaultAlphaZero(self.game, self.args.net_args,
                                          self.args.architecture)
            self.search_engine = AlphaZeroMCTS(self.game, self.model,
                                               self.args.args)
            self.name = self.args.name

    def set_variables(self, model, search_engine, name):
        self.model = model
        self.search_engine = search_engine
        self.name = name

    def refresh(self, hard_reset: bool = False):
        super().refresh()
        self.search_engine.clear_tree()

    def act(self, state: GameState) -> int:
        pi, _ = self.search_engine.runMCTS(state, self.history, temp=0)
        return np.argmax(pi).item()
Пример #2
0
class DefaultAlphaZeroPlayer(Player):
    """
    Standard AlphaZero agent that samples actions from MCTS within a given environment model.
    """
    def __init__(self,
                 game,
                 arg_file: typing.Optional[str] = None,
                 name: str = "") -> None:
        super().__init__(game, arg_file, name, parametric=True)
        if self.player_args is not None:
            # Initialize AlphaZero by loading its parameter config and constructing the network and search classes.
            self.args = DotDict.from_json(self.player_args)

            self.model = DefaultAlphaZero(self.game, self.args.net_args,
                                          self.args.architecture)
            self.search_engine = AlphaZeroMCTS(self.game, self.model,
                                               self.args.args)
            self.name = self.args.name

    def set_variables(self, model, search_engine, name: str) -> None:
        """ Assign Neural Network and Search class to an external reference """
        self.model = model
        self.search_engine = search_engine
        self.name = name

    def refresh(self, hard_reset: bool = False):
        """ Refresh internal state of the Agent along with stored statistics within the MCTS tree """
        super().refresh()
        self.search_engine.clear_tree()

    def act(self, state: GameState) -> int:
        """ Sample actions using MCTS using the given environment model. """
        pi, _ = self.search_engine.runMCTS(state, self.history, temp=0)
        return np.argmax(pi).item()
Пример #3
0
 def __init__(self, exploration, budget, model, boardsize, temperature=0):
     """
     The AlphaZero algorihtm must be configured with the parameters for the
     PUCT formula for childnode selection and expansion along with a search budget.
     Also the neural network must be provided along with the boardsize in order
     to initialize the MCTS backend of AlphaZero.
     :param exploration: float The exploration parameter of PUCT (c_puct).
     :param budget: int The amount of MCTS simulations to perform (N).
     :param model: AlphaZeroModel.Model A AlphaZeroModel backend AlphaZeroModel that guides MCTS's search
     :param boardsize: int Size of the game board in order to initialize MCTS.
     :param temperature: int Governs the degree of exploration. (0 = greedy)
     """
     super().__init__()
     self.args = dotdict({'numMCTSSims': budget, 'cpuct': exploration})
     self.model = model
     self.game = HexGame(boardsize)
     self.searcher = MCTS(self.game, self.model, self.args)
     self.temperature = temperature  # temp=0 implies greedy actions
Пример #4
0
    def __init__(self,
                 game,
                 arg_file: typing.Optional[str] = None,
                 name: str = "") -> None:
        super().__init__(game, arg_file, name, parametric=True)
        if self.player_args is not None:
            self.args = DotDict.from_json(self.player_args)

            self.model = DefaultAlphaZero(self.game, self.args.net_args,
                                          self.args.architecture)
            self.search_engine = AlphaZeroMCTS(self.game, self.model,
                                               self.args.args)
            self.name = self.args.name
Пример #5
0
class AlphaZeroPolicy(Policy):
    """
    Derived class of Policy to choose actions based on the AlphaZero Algorithm.
    """
    def __init__(self, exploration, budget, model, boardsize, temperature=0):
        """
        The AlphaZero algorihtm must be configured with the parameters for the
        PUCT formula for childnode selection and expansion along with a search budget.
        Also the neural network must be provided along with the boardsize in order
        to initialize the MCTS backend of AlphaZero.
        :param exploration: float The exploration parameter of PUCT (c_puct).
        :param budget: int The amount of MCTS simulations to perform (N).
        :param model: AlphaZeroModel.Model A AlphaZeroModel backend AlphaZeroModel that guides MCTS's search
        :param boardsize: int Size of the game board in order to initialize MCTS.
        :param temperature: int Governs the degree of exploration. (0 = greedy)
        """
        super().__init__()
        self.args = dotdict({'numMCTSSims': budget, 'cpuct': exploration})
        self.model = model
        self.game = HexGame(boardsize)
        self.searcher = MCTS(self.game, self.model, self.args)
        self.temperature = temperature  # temp=0 implies greedy actions

    def generate_move(self, hex_board):
        """
        With the given parameters for the AlphaZero procedure and a currently
        provided game-state (=hex_board) call the backend MCTS class to
        perform forward search guided by the neural network to choose
        a move for the current player.
        :param hex_board: HexBoard Class for game-logic.
        :return: tuple Coordinate on the HexBoard to move to.
        :see: MCTSSearcher from .hex_search
        """
        self.calls += 1
        if self.perspective is None:
            raise Exception("No search/ player perspective given.")

        # The neural network AlphaZeroModel only learns in an uniform player perspective.
        # Hence we first alter the symmetry of the board according to the current player.
        # The returned move is of course transformed to the perspective of the
        # board that was provided in the function's argument.
        search_board = self.game.getCanonicalForm(np.copy(hex_board.board),
                                                  self.perspective)
        pi = self.searcher.runMCTS(search_board, temp=self.temperature)
        move_idx = np.argmax(pi)

        move = (move_idx // hex_board.size, move_idx % hex_board.size)
        if self.perspective == -1:  # Canonical form finds a move on a transposed board.
            move = move[::-1]

        return move
Пример #6
0
    def __init__(self,
                 game,
                 arg_file: typing.Optional[str] = None,
                 name: str = "") -> None:
        super().__init__(game, arg_file, name, parametric=True)
        if self.player_args is not None:
            # Initialize AlphaZero by loading its parameter config and constructing the network and search classes.
            self.args = DotDict.from_json(self.player_args)

            self.model = DefaultAlphaZero(self.game, self.args.net_args,
                                          self.args.architecture)
            self.search_engine = AlphaZeroMCTS(self.game, self.model,
                                               self.args.args)
            self.name = self.args.name