class AlphaGoPlayer():
    def __init__(self, _, seed, player):
        self.game = GoGame(13, 7.5)  # THe Go Game class

        #THERE IS NO init state
        self.board = self.game.get_starting_board()

        self.seed = seed

        self.player = -1 if player == 1 else 1

        self.args = parse_args()
        self.nnet = NetTrainer(self.game, self.args)
        self.nnet.load_checkpoint(self.args.best_model_path)

        self.mct = MCT(self.nnet, self.game, self.args, noise=False)

    def get_action(self, _, opponent_action):

        if opponent_action != -1:  # MEANS
            self.board = self.game.get_next_state(self.board, -1 * self.player,
                                                  opponent_action)

        self.board.set_move_num(0)
        action_probs = self.mct.actionProb(self.board, self.player, 0)
        self.board.set_move_num(-1)

        best_action = np.argmax(action_probs)
        self.board = self.game.get_next_state(self.board, self.player,
                                              best_action)

        return best_action
示例#2
0
import random
from go_game import GoGame
import numpy
import sys
numpy.set_printoptions(threshold=sys.maxsize)

if __name__ == "__main__":
    game = GoGame(13, 5.5)

    board = game.get_starting_board()
    player = -1

    while True:
        if game.get_game_ended(board, player):
            break

        actions = game.get_valid_moves(board, player)
        actions[-1] = 0
        selected_action = None
        possible_actions = []

        for action, indicator in enumerate(actions):
            if indicator == 1:
                possible_actions.append(action)

        if len(possible_actions) > 0:
            selected_action = random.choice(possible_actions)
        else:
            selected_action = game.get_action_space_size() - 1
        board = game.get_next_state(board, player, selected_action)