Пример #1
0
def run(config=None):
    if config == None:
        config = load_config(file_name=root_data_file + 'resnet_6_6_4.model',
                             only_load_param=True)
    try:
        board = Board(width=config.board_width,
                      height=config.board_height,
                      n_in_row=config.n_in_row)
        game = Game(board)

        # --------------- human VS AI ----------------
        best_policy = PolicyValueNet(
            config.board_width,
            config.board_height,
            Network=config.network,
            net_params=config.policy_param
        )  # setup which Network to use based on the net_params

        mcts_player = AlphaZeroPlayer(
            best_policy.predict,
            c_puct=config.c_puct,
            nplays=100,
            add_noise=True)  # set larger nplays for better performance

        # uncomment the following line to play with pure MCTS
        # mcts_player2 = RolloutPlayer(nplays=1000, c_puct=config.c_puct)

        # human player, input your move in the format: 2,3
        human = HumanPlayer()

        # set who_first=0 for human first
        game.start_game(human, mcts_player, who_first=1, is_shown=1)

    except KeyboardInterrupt:
        print('\n\rquit')
Пример #2
0
 def __init__(self):
     # params of the board and the game
     self.board_width = 6
     self.board_height = 6
     self.n_in_row = 4
     self.board = ShogiBoard()
     # training params
     self.learn_rate = 5e-3
     self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
     self.temp = 1.0  # the temperature param
     self.n_playout = 400  # num of simulations for each move
     self.c_puct = 5
     self.buffer_size = 10000
     self.batch_size = 512  # mini-batch size for training
     self.data_buffer = deque(maxlen=self.buffer_size)
     self.play_batch_size = 1
     self.epochs = 5  # num of train_steps for each update
     self.kl_targ = 0.025
     self.check_freq = 50
     self.game_batch_num = 3000
     self.best_win_ratio = 0.0
     # num of simulations used for the pure mcts, which is used as the opponent to evaluate the trained policy
     self.pure_mcts_playout_num = 1000
     # start training from a given policy-value net
     #        policy_param = pickle.load(open('current_policy.model', 'rb'))
     #        self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, net_params = policy_param)
     # start training from a new policy-value net
     self.policy_value_net = PolicyValueNet()
     self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                   c_puct=self.c_puct,
                                   n_playout=self.n_playout,
                                   is_selfplay=1)
Пример #3
0
    def __init__(self, config=None):
        # params of the board and the game
        self.config = config if config else Config()

        # Network wrapper
        self.policy_value_net = PolicyValueNet(self.config.board_width, self.config.board_height,
                                               net_params=self.config.policy_param,
                                               Network=self.config.network)

        # 传入policy_value_net的predict方法,神经网络辅助MCTS搜索过程
        self.mcts_player = AlphaZeroPlayer(self.policy_value_net.predict, c_puct=self.config.c_puct,
                                           nplays=self.config.n_playout, is_selfplay=True)
Пример #4
0
 def __init__(self, modelPath=None):
     # 棋盘和游戏
     self.boardWidth = 4
     self.boardHeight = 4
     self.game = Game()
     # 训练参数
     self.learningRate = 5e-3
     self.learningRateMultiplier = 1.0  # 自适应
     try:
         self.learningRateMultiplier = float(Util.getNewestLearningRateMultiplier(type='from_db' if modelPath is None else 'from_self_play'))
     except Exception as e:
         print(str(e))
     self.temperature = 1.0  # 温度, 含义见参考资料.txt第2条
     self.playoutTimes = 500  # 模拟次数
     self.polynomialUpperConfidenceTreesConstant = 5  # 论文中的c_puct, 含义见参考资料.txt第2条
     self.dataDequeSize = 10000
     self.trainBatchSize = 512  # 训练批次尺寸,原本为512,先使用50用于调试
     self.dataDeque = deque(maxlen=self.dataDequeSize)  # 超出maxlen会自动删除另一边的元素
     self.playBatchSize = 1
     self.epochs = 5  # 单次训练拟合多少次
     self.KLDParam = 0.025
     self.checkFrequency = 1000  # 之前为100,改为1000,因为评测太浪费时间
     self.gameBatchSize = 200000  # 5000时对mcts1500的胜率为0.9,所以再升到10000
     self.maxWinRatio = 0.0
     self.pureMctsPlayoutTimes = 1500  # 初始为500,现在已到1500s
     self.pureMctsPlayoutTimesAddend = 500
     self.maxPureMctsPlayoutTimes = 3000
     self.modelPath = modelPath
     self.trainedGameCountInDB = Util.readGameCount(type='train')
     self.lossDataCount = 12184  # 被黑客删掉的棋谱数量,这些棋谱数据已无法恢复
     if modelPath is not None:
         self.policyValueNet = PolicyValueNet(self.boardWidth, self.boardHeight, logPath=Util.getTrainLogPath(isFromDB=False), modelPath=modelPath)
         self.trainedGameCount = self.trainedGameCountInDB + self.lossDataCount
     else:
         self.policyValueNet = PolicyValueNet(self.boardWidth, self.boardHeight, logPath=Util.getTrainLogPath(isFromDB=True))
         self.trainedGameCount = 0 + self.lossDataCount
     self.zeroPlayer = ZeroPlayer(self.policyValueNet.policyValueFunction,
                                  polynomialUpperConfidenceTreesConstant=self.polynomialUpperConfidenceTreesConstant,
                                  playoutTimes=self.playoutTimes, isSelfPlay=1)
Пример #5
0
    def __init__(self, config=None):
        # params of the board and the game
        self.config = config if config else Config()
        if not hasattr(self.config, "use_gpu"):
            setattr(config, "use_gpu",
                    False)  # compatible with old version config
        # Network wrapper
        self.policy_value_net = PolicyValueNet(
            self.config.board_width,
            self.config.board_height,
            net_params=self.config.policy_param,
            Network=self.config.network,
            use_gpu=self.config.use_gpu)

        # forward the reference of policy_value_net'predict function,for MCTS simulation
        self.mcts_player = AlphaZeroPlayer(self.policy_value_net.predict,
                                           c_puct=self.config.c_puct,
                                           nplays=self.config.n_playout,
                                           is_selfplay=True)
Пример #6
0
def run(config=None):
    if config == None:
        config = load_config(file_name=root_data_file + 'resnet_6_6_4.model',
                             only_load_param=True)
    try:
        board = Board(width=config.board_width,
                      height=config.board_height,
                      n_in_row=config.n_in_row)

        #--------------------1.set player:alphazero VS human---------------------#
        best_policy = PolicyValueNet(
            config.board_width,
            config.board_height,
            Network=config.network,
            net_params=config.policy_param
        )  # setup which Network to use based on the net_params

        player1 = AlphaZeroPlayer(
            best_policy.predict, c_puct=config.c_puct,
            nplays=1000)  #set larger nplays for better performance

        # uncomment the following line to play with pure MCTS
        #player2 = RolloutPlayer(nplays=1000, c_puct=config.c_puct)
        player2 = HumanPlayer()
        # --------------------2.set order---------------------#
        who_first = 0  # 0 means player1 first, otherwise player2 first

        # --------------------3.start game--------------------#
        game = Game(board, is_visualize=True)
        t = threading.Thread(target=game.start_game,
                             args=(player1, player2, who_first))
        t.start()
        game.show()

    except:
        print('\n\rquit')