Python PolicyValueNet.PolicyValueNet примеры использования

Язык программирования: Python

Класс/Тип: PolicyValueNet

Метод/Функция: PolicyValueNet

Примеров на hotexamples.com: 6

Python PolicyValueNet.PolicyValueNet - 6 примеров найдено. Это лучшие примеры Python кода для PolicyValueNet.PolicyValueNet из пакета Codelib, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PolicyValueNet(6)

get_policy_param(2)

doOneTrain(1)

doPolicyValueFunction(1)

fit(1)

policy_value(1)

predict_many(1)

saveModel(1)

Пример #1

Показать файл

Файл: Run.py Проект: zeal4u/AlphaZero_Gobang

def run(config=None):
    if config == None:
        config = load_config(file_name=root_data_file + 'resnet_6_6_4.model',
                             only_load_param=True)
    try:
        board = Board(width=config.board_width,
                      height=config.board_height,
                      n_in_row=config.n_in_row)
        game = Game(board)

        # --------------- human VS AI ----------------
        best_policy = PolicyValueNet(
            config.board_width,
            config.board_height,
            Network=config.network,
            net_params=config.policy_param
        )  # setup which Network to use based on the net_params

        mcts_player = AlphaZeroPlayer(
            best_policy.predict,
            c_puct=config.c_puct,
            nplays=100,
            add_noise=True)  # set larger nplays for better performance

        # uncomment the following line to play with pure MCTS
        # mcts_player2 = RolloutPlayer(nplays=1000, c_puct=config.c_puct)

        # human player, input your move in the format: 2,3
        human = HumanPlayer()

        # set who_first=0 for human first
        game.start_game(human, mcts_player, who_first=1, is_shown=1)

    except KeyboardInterrupt:
        print('\n\rquit')

Пример #2

Показать файл

Файл: train.py Проект: fk873806472/TwelveShogi_AlphaZero

 def __init__(self):
     # params of the board and the game
     self.board_width = 6
     self.board_height = 6
     self.n_in_row = 4
     self.board = ShogiBoard()
     # training params
     self.learn_rate = 5e-3
     self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
     self.temp = 1.0  # the temperature param
     self.n_playout = 400  # num of simulations for each move
     self.c_puct = 5
     self.buffer_size = 10000
     self.batch_size = 512  # mini-batch size for training
     self.data_buffer = deque(maxlen=self.buffer_size)
     self.play_batch_size = 1
     self.epochs = 5  # num of train_steps for each update
     self.kl_targ = 0.025
     self.check_freq = 50
     self.game_batch_num = 3000
     self.best_win_ratio = 0.0
     # num of simulations used for the pure mcts, which is used as the opponent to evaluate the trained policy
     self.pure_mcts_playout_num = 1000
     # start training from a given policy-value net
     #        policy_param = pickle.load(open('current_policy.model', 'rb'))
     #        self.policy_value_net = PolicyValueNet(self.board_width, self.board_height, net_params = policy_param)
     # start training from a new policy-value net
     self.policy_value_net = PolicyValueNet()
     self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                   c_puct=self.c_puct,
                                   n_playout=self.n_playout,
                                   is_selfplay=1)

Пример #3

Показать файл

Файл: Train.py Проект: wsszh/AlphaZero_Gobang

    def __init__(self, config=None):
        # params of the board and the game
        self.config = config if config else Config()

        # Network wrapper
        self.policy_value_net = PolicyValueNet(self.config.board_width, self.config.board_height,
                                               net_params=self.config.policy_param,
                                               Network=self.config.network)

        # 传入policy_value_net的predict方法，神经网络辅助MCTS搜索过程
        self.mcts_player = AlphaZeroPlayer(self.policy_value_net.predict, c_puct=self.config.c_puct,
                                           nplays=self.config.n_playout, is_selfplay=True)

Пример #4

Показать файл

 def __init__(self, modelPath=None):
     # 棋盘和游戏
     self.boardWidth = 4
     self.boardHeight = 4
     self.game = Game()
     # 训练参数
     self.learningRate = 5e-3
     self.learningRateMultiplier = 1.0  # 自适应
     try:
         self.learningRateMultiplier = float(Util.getNewestLearningRateMultiplier(type='from_db' if modelPath is None else 'from_self_play'))
     except Exception as e:
         print(str(e))
     self.temperature = 1.0  # 温度, 含义见参考资料.txt第2条
     self.playoutTimes = 500  # 模拟次数
     self.polynomialUpperConfidenceTreesConstant = 5  # 论文中的c_puct, 含义见参考资料.txt第2条
     self.dataDequeSize = 10000
     self.trainBatchSize = 512  # 训练批次尺寸,原本为512,先使用50用于调试
     self.dataDeque = deque(maxlen=self.dataDequeSize)  # 超出maxlen会自动删除另一边的元素
     self.playBatchSize = 1
     self.epochs = 5  # 单次训练拟合多少次
     self.KLDParam = 0.025
     self.checkFrequency = 1000  # 之前为100,改为1000,因为评测太浪费时间
     self.gameBatchSize = 200000  # 5000时对mcts1500的胜率为0.9,所以再升到10000
     self.maxWinRatio = 0.0
     self.pureMctsPlayoutTimes = 1500  # 初始为500,现在已到1500s
     self.pureMctsPlayoutTimesAddend = 500
     self.maxPureMctsPlayoutTimes = 3000
     self.modelPath = modelPath
     self.trainedGameCountInDB = Util.readGameCount(type='train')
     self.lossDataCount = 12184  # 被黑客删掉的棋谱数量,这些棋谱数据已无法恢复
     if modelPath is not None:
         self.policyValueNet = PolicyValueNet(self.boardWidth, self.boardHeight, logPath=Util.getTrainLogPath(isFromDB=False), modelPath=modelPath)
         self.trainedGameCount = self.trainedGameCountInDB + self.lossDataCount
     else:
         self.policyValueNet = PolicyValueNet(self.boardWidth, self.boardHeight, logPath=Util.getTrainLogPath(isFromDB=True))
         self.trainedGameCount = 0 + self.lossDataCount
     self.zeroPlayer = ZeroPlayer(self.policyValueNet.policyValueFunction,
                                  polynomialUpperConfidenceTreesConstant=self.polynomialUpperConfidenceTreesConstant,
                                  playoutTimes=self.playoutTimes, isSelfPlay=1)

Пример #5

Показать файл

    def __init__(self, config=None):
        # params of the board and the game
        self.config = config if config else Config()
        if not hasattr(self.config, "use_gpu"):
            setattr(config, "use_gpu",
                    False)  # compatible with old version config
        # Network wrapper
        self.policy_value_net = PolicyValueNet(
            self.config.board_width,
            self.config.board_height,
            net_params=self.config.policy_param,
            Network=self.config.network,
            use_gpu=self.config.use_gpu)

        # forward the reference of policy_value_net'predict function，for MCTS simulation
        self.mcts_player = AlphaZeroPlayer(self.policy_value_net.predict,
                                           c_puct=self.config.c_puct,
                                           nplays=self.config.n_playout,
                                           is_selfplay=True)

Пример #6

Показать файл

def run(config=None):
    if config == None:
        config = load_config(file_name=root_data_file + 'resnet_6_6_4.model',
                             only_load_param=True)
    try:
        board = Board(width=config.board_width,
                      height=config.board_height,
                      n_in_row=config.n_in_row)

        #--------------------1.set player:alphazero VS human---------------------#
        best_policy = PolicyValueNet(
            config.board_width,
            config.board_height,
            Network=config.network,
            net_params=config.policy_param
        )  # setup which Network to use based on the net_params

        player1 = AlphaZeroPlayer(
            best_policy.predict, c_puct=config.c_puct,
            nplays=1000)  #set larger nplays for better performance

        # uncomment the following line to play with pure MCTS
        #player2 = RolloutPlayer(nplays=1000, c_puct=config.c_puct)
        player2 = HumanPlayer()
        # --------------------2.set order---------------------#
        who_first = 0  # 0 means player1 first, otherwise player2 first

        # --------------------3.start game--------------------#
        game = Game(board, is_visualize=True)
        t = threading.Thread(target=game.start_game,
                             args=(player1, player2, who_first))
        t.start()
        game.show()

    except:
        print('\n\rquit')