class DefaultAlphaZeroPlayer(Player): def __init__(self, game, arg_file: typing.Optional[str] = None, name: str = "") -> None: super().__init__(game, arg_file, name, parametric=True) if self.player_args is not None: self.args = DotDict.from_json(self.player_args) self.model = DefaultAlphaZero(self.game, self.args.net_args, self.args.architecture) self.search_engine = AlphaZeroMCTS(self.game, self.model, self.args.args) self.name = self.args.name def set_variables(self, model, search_engine, name): self.model = model self.search_engine = search_engine self.name = name def refresh(self, hard_reset: bool = False): super().refresh() self.search_engine.clear_tree() def act(self, state: GameState) -> int: pi, _ = self.search_engine.runMCTS(state, self.history, temp=0) return np.argmax(pi).item()
class DefaultAlphaZeroPlayer(Player): """ Standard AlphaZero agent that samples actions from MCTS within a given environment model. """ def __init__(self, game, arg_file: typing.Optional[str] = None, name: str = "") -> None: super().__init__(game, arg_file, name, parametric=True) if self.player_args is not None: # Initialize AlphaZero by loading its parameter config and constructing the network and search classes. self.args = DotDict.from_json(self.player_args) self.model = DefaultAlphaZero(self.game, self.args.net_args, self.args.architecture) self.search_engine = AlphaZeroMCTS(self.game, self.model, self.args.args) self.name = self.args.name def set_variables(self, model, search_engine, name: str) -> None: """ Assign Neural Network and Search class to an external reference """ self.model = model self.search_engine = search_engine self.name = name def refresh(self, hard_reset: bool = False): """ Refresh internal state of the Agent along with stored statistics within the MCTS tree """ super().refresh() self.search_engine.clear_tree() def act(self, state: GameState) -> int: """ Sample actions using MCTS using the given environment model. """ pi, _ = self.search_engine.runMCTS(state, self.history, temp=0) return np.argmax(pi).item()
def __init__(self, exploration, budget, model, boardsize, temperature=0): """ The AlphaZero algorihtm must be configured with the parameters for the PUCT formula for childnode selection and expansion along with a search budget. Also the neural network must be provided along with the boardsize in order to initialize the MCTS backend of AlphaZero. :param exploration: float The exploration parameter of PUCT (c_puct). :param budget: int The amount of MCTS simulations to perform (N). :param model: AlphaZeroModel.Model A AlphaZeroModel backend AlphaZeroModel that guides MCTS's search :param boardsize: int Size of the game board in order to initialize MCTS. :param temperature: int Governs the degree of exploration. (0 = greedy) """ super().__init__() self.args = dotdict({'numMCTSSims': budget, 'cpuct': exploration}) self.model = model self.game = HexGame(boardsize) self.searcher = MCTS(self.game, self.model, self.args) self.temperature = temperature # temp=0 implies greedy actions
def __init__(self, game, arg_file: typing.Optional[str] = None, name: str = "") -> None: super().__init__(game, arg_file, name, parametric=True) if self.player_args is not None: self.args = DotDict.from_json(self.player_args) self.model = DefaultAlphaZero(self.game, self.args.net_args, self.args.architecture) self.search_engine = AlphaZeroMCTS(self.game, self.model, self.args.args) self.name = self.args.name
class AlphaZeroPolicy(Policy): """ Derived class of Policy to choose actions based on the AlphaZero Algorithm. """ def __init__(self, exploration, budget, model, boardsize, temperature=0): """ The AlphaZero algorihtm must be configured with the parameters for the PUCT formula for childnode selection and expansion along with a search budget. Also the neural network must be provided along with the boardsize in order to initialize the MCTS backend of AlphaZero. :param exploration: float The exploration parameter of PUCT (c_puct). :param budget: int The amount of MCTS simulations to perform (N). :param model: AlphaZeroModel.Model A AlphaZeroModel backend AlphaZeroModel that guides MCTS's search :param boardsize: int Size of the game board in order to initialize MCTS. :param temperature: int Governs the degree of exploration. (0 = greedy) """ super().__init__() self.args = dotdict({'numMCTSSims': budget, 'cpuct': exploration}) self.model = model self.game = HexGame(boardsize) self.searcher = MCTS(self.game, self.model, self.args) self.temperature = temperature # temp=0 implies greedy actions def generate_move(self, hex_board): """ With the given parameters for the AlphaZero procedure and a currently provided game-state (=hex_board) call the backend MCTS class to perform forward search guided by the neural network to choose a move for the current player. :param hex_board: HexBoard Class for game-logic. :return: tuple Coordinate on the HexBoard to move to. :see: MCTSSearcher from .hex_search """ self.calls += 1 if self.perspective is None: raise Exception("No search/ player perspective given.") # The neural network AlphaZeroModel only learns in an uniform player perspective. # Hence we first alter the symmetry of the board according to the current player. # The returned move is of course transformed to the perspective of the # board that was provided in the function's argument. search_board = self.game.getCanonicalForm(np.copy(hex_board.board), self.perspective) pi = self.searcher.runMCTS(search_board, temp=self.temperature) move_idx = np.argmax(pi) move = (move_idx // hex_board.size, move_idx % hex_board.size) if self.perspective == -1: # Canonical form finds a move on a transposed board. move = move[::-1] return move
def __init__(self, game, arg_file: typing.Optional[str] = None, name: str = "") -> None: super().__init__(game, arg_file, name, parametric=True) if self.player_args is not None: # Initialize AlphaZero by loading its parameter config and constructing the network and search classes. self.args = DotDict.from_json(self.player_args) self.model = DefaultAlphaZero(self.game, self.args.net_args, self.args.architecture) self.search_engine = AlphaZeroMCTS(self.game, self.model, self.args.args) self.name = self.args.name