class AlphaGoPlayer(): def __init__(self, _, seed, player): self.game = GoGame(13, 7.5) # THe Go Game class #THERE IS NO init state self.board = self.game.get_starting_board() self.seed = seed self.player = -1 if player == 1 else 1 self.args = parse_args() self.nnet = NetTrainer(self.game, self.args) self.nnet.load_checkpoint(self.args.best_model_path) self.mct = MCT(self.nnet, self.game, self.args, noise=False) def get_action(self, _, opponent_action): if opponent_action != -1: # MEANS self.board = self.game.get_next_state(self.board, -1 * self.player, opponent_action) self.board.set_move_num(0) action_probs = self.mct.actionProb(self.board, self.player, 0) self.board.set_move_num(-1) best_action = np.argmax(action_probs) self.board = self.game.get_next_state(self.board, self.player, best_action) return best_action
import random from go_game import GoGame import numpy import sys numpy.set_printoptions(threshold=sys.maxsize) if __name__ == "__main__": game = GoGame(13, 5.5) board = game.get_starting_board() player = -1 while True: if game.get_game_ended(board, player): break actions = game.get_valid_moves(board, player) actions[-1] = 0 selected_action = None possible_actions = [] for action, indicator in enumerate(actions): if indicator == 1: possible_actions.append(action) if len(possible_actions) > 0: selected_action = random.choice(possible_actions) else: selected_action = game.get_action_space_size() - 1 board = game.get_next_state(board, player, selected_action)