Python MCTSPlayer.reset_player примеры использования

Язык программирования: Python

Пространство имен/Пакет: mcts_alphaZero

Класс/Тип: MCTSPlayer

Метод/Функция: reset_player

Примеров на hotexamples.com: 2

Python MCTSPlayer.reset_player - 2 примера найдено. Это лучшие примеры Python кода для mcts_alphaZero.MCTSPlayer.reset_player, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MCTSPlayer(30)

get_action(12)

set_player_ind(4)

reset_player(2)

Пример #1

Показать файл

Файл: game_board.py Проект: awaited-hare/AlphaZero_Gomoku_MPI

    def start_play_with_UI(self,
                           AI: mcts_alphaZero.MCTSPlayer,
                           start_player=0):
        """
        a GUI for playing
        """
        AI.reset_player()
        self.board.init_board()
        current_player = SP = start_player
        UI = GUI(self.board.width)
        end = False
        while True:
            print("current_player", current_player)

            if current_player == 0:
                UI.show_messages("Your turn")
            else:
                UI.show_messages("AI's turn")

            if current_player == 1 and not end:
                move, move_probs = AI.get_action(self.board,
                                                 is_selfplay=False,
                                                 print_probs_value=1)
            else:
                inp = UI.get_input()
                if inp[0] == "move" and not end:
                    if type(inp[1]) != int:
                        move = UI.loc_2_move(inp[1])
                    else:
                        move = inp[1]
                elif inp[0] == "RestartGame":
                    end = False
                    current_player = SP
                    self.board.init_board()
                    UI.restart_game()
                    AI.reset_player()
                    continue
                elif inp[0] == "ResetScore":
                    UI.reset_score()
                    continue
                elif inp[0] == "quit":
                    exit()
                    continue
                elif inp[0] == "SwitchPlayer":
                    end = False
                    self.board.init_board()
                    UI.restart_game(False)
                    UI.reset_score()
                    AI.reset_player()
                    SP = (SP + 1) % 2
                    current_player = SP
                    continue
                else:
                    # print('ignored inp:', inp)
                    continue
            # print('player %r move : %r'%(current_player,[move//self.board.width,move%self.board.width]))
            if not end:
                # print(move, type(move), current_player)
                UI.render_step(move, self.board.current_player)
                self.board.do_move(move)
                # print('move', move)
                # print(2, self.board.get_current_player())
                current_player = (current_player + 1) % 2
                # UI.render_step(move, current_player)
                end, winner = self.board.game_end()
                if end:
                    if winner != -1:
                        print("Game end. Winner is player", winner)
                        UI.add_score(winner)
                    else:
                        print("Game end. Tie")
                    print(UI.score)
                    print()

Пример #2

Показать файл

class Chess_Board_Canvas(Tkinter.Canvas):
    # 棋盤繪製
    def __init__(self, master=None, height=0, width=0):
        Tkinter.Canvas.__init__(self, master, height=height, width=width)
        self.step_record_chess_board = Record.Step_Record_Chess_Board()
        # 初始化記步器
        self.height = 15
        self.width = 15
        self.init_chess_board_points()  # 畫點
        self.init_chess_board_canvas()  # 畫棋盤
        self.board = MCTS.Board()
        self.n_in_row = 5
        self.n_playout = 400  # num of simulations for each move
        self.c_puct = 5
        """
        Important 1: Python is pass by reference
        So the self.board will be modified by other operations
        """
        self.AI = MCTS.MonteCarlo(self.board, 1)
        self.AI_1 = MCTS.MonteCarlo(self.board, 0)
        self.clicked = 1
        self.init = True  # first place is given by user (later need to be replaced as a random selection)
        self.train_or_play = True  # True - train, False - play
        self.step = 0
        self.text_id = None

    def init_chess_board_points(self):
        '''
        生成棋盤點,並且對應到像素座標
        保存到 chess_board_points 屬性
        '''
        self.chess_board_points = [[None for i in range(15)]
                                   for j in range(15)]

        for i in range(15):
            for j in range(15):
                self.chess_board_points[i][j] = Point.Point(i, j)
                # 轉換棋盤座標像素座標
        # self.label_step = Label(self, text="Step")

    def init_chess_board_canvas(self):
        '''
        初始化棋盤
        '''

        for i in range(15):  # 直線
            self.create_line(self.chess_board_points[i][0].pixel_x,
                             self.chess_board_points[i][0].pixel_y,
                             self.chess_board_points[i][14].pixel_x,
                             self.chess_board_points[i][14].pixel_y)

        for j in range(15):  # 橫線
            self.create_line(self.chess_board_points[0][j].pixel_x,
                             self.chess_board_points[0][j].pixel_y,
                             self.chess_board_points[14][j].pixel_x,
                             self.chess_board_points[14][j].pixel_y)
        # 邊界
        self.create_line(self.chess_board_points[2][2].pixel_x,
                         self.chess_board_points[2][2].pixel_y,
                         self.chess_board_points[2][12].pixel_x,
                         self.chess_board_points[2][12].pixel_y,
                         fill="red")
        self.create_line(self.chess_board_points[12][2].pixel_x,
                         self.chess_board_points[12][2].pixel_y,
                         self.chess_board_points[12][12].pixel_x,
                         self.chess_board_points[12][12].pixel_y,
                         fill="red")
        self.create_line(self.chess_board_points[2][12].pixel_x,
                         self.chess_board_points[2][12].pixel_y,
                         self.chess_board_points[12][12].pixel_x,
                         self.chess_board_points[12][12].pixel_y,
                         fill="red")
        self.create_line(self.chess_board_points[2][2].pixel_x,
                         self.chess_board_points[2][2].pixel_y,
                         self.chess_board_points[12][2].pixel_x,
                         self.chess_board_points[12][2].pixel_y,
                         fill="red")

        for i in range(15):  # 交點橢圓
            for j in range(15):
                r = 1
                self.create_oval(self.chess_board_points[i][j].pixel_x - r,
                                 self.chess_board_points[i][j].pixel_y - r,
                                 self.chess_board_points[i][j].pixel_x + r,
                                 self.chess_board_points[i][j].pixel_y + r)

    def click1(self, event):  # click關鍵字重複
        if self.train_or_play:
            print("In self training, mouse event is not available")
            return
        '''
        Mouse listener function, for the game played between human and AI
        '''
        if (self.clicked == 1):

            for i in range(15):
                for j in range(15):
                    square_distance = math.pow(
                        (event.x - self.chess_board_points[i][j].pixel_x),
                        2) + math.pow(
                            (event.y - self.chess_board_points[i][j].pixel_y),
                            2)
                    # 計算滑鼠的位置和點的距離
                    # 距離小於14的點

                    if (square_distance <=
                            200) and (self.step_record_chess_board.checkState(
                                i, j) == None):  # 合法落子位置
                        self.clicked = 0
                        if self.step_record_chess_board.who_to_play() == 1:
                            # 奇數次，黑落子
                            self.create_oval(
                                self.chess_board_points[i][j].pixel_x - 10,
                                self.chess_board_points[i][j].pixel_y - 10,
                                self.chess_board_points[i][j].pixel_x + 10,
                                self.chess_board_points[i][j].pixel_y + 10,
                                fill='black')
                            Tkinter.Canvas.update(self)
                        # 偶數次，白落子
                        elif self.step_record_chess_board.who_to_play() == 2:
                            self.create_oval(
                                self.chess_board_points[i][j].pixel_x - 10,
                                self.chess_board_points[i][j].pixel_y - 10,
                                self.chess_board_points[i][j].pixel_x + 10,
                                self.chess_board_points[i][j].pixel_y + 10,
                                fill='white')

                        result = 0
                        if (self.step_record_chess_board.value[1][i][j] >=
                                90000):
                            result = 1
                            self.clicked = 1
                        self.step_record_chess_board.insert_record(i, j)
                        # 落子，最多225

                        #######result = self.step_record_chess_board.check()
                        # 判斷是否有五子連珠

                        if result == 1:
                            self.create_text(240, 475, text='the black wins')
                            # 解除左键绑定
                            self.unbind('<Button-1>')
                            # """Unbind for this widget for event SEQUENCE  the
                            #     function identified with FUNCID."""

                        elif result == 2:
                            self.create_text(240, 475, text='the white wins')
                            # 解除左键绑定
                            self.unbind('<Button-1>')
            # 根據價值網路落子
            if (self.clicked != 1):
                x = 0
                y = 0
                max_value = 0
                for i in range(0, 15):
                    for j in range(0, 15):
                        if (self.step_record_chess_board.value[2][i][j] >=
                                90000):
                            x = i
                            y = j
                            max_value = 99999
                            break
                        elif (self.step_record_chess_board.value[0][i][j] >=
                              max_value):
                            x = i
                            y = j
                            max_value = self.step_record_chess_board.value[0][
                                i][j]
                if (self.step_record_chess_board.value[2][x][y] >= 90000):
                    result = 2

                self.board.state = np.copy(self.step_record_chess_board.state)
                self.AI.value = self.step_record_chess_board.value[0]
                self.AI.update(self.board.state)
                action = self.AI.bestAction()
                x, y = action

                self.step_record_chess_board.insert_record(x, y)
                self.create_oval(self.chess_board_points[x][y].pixel_x - 10,
                                 self.chess_board_points[x][y].pixel_y - 10,
                                 self.chess_board_points[x][y].pixel_x + 10,
                                 self.chess_board_points[x][y].pixel_y + 10,
                                 fill='white')
                #######result = self.step_record_chess_board.check()
                # 判斷是否有五子連珠

                if result == 1:
                    self.create_text(240, 475, text='the black wins')
                    # 解除左键绑定
                    self.unbind('<Button-1>')
                # """Unbind for this widget for event SEQUENCE  the
                #     function identified with FUNCID."""

                elif result == 2:
                    self.create_text(240, 475, text='the white wins')
                    # 解除左键绑定
                    self.unbind('<Button-1>')
                self.clicked = 1

    def click2(self):  # click關鍵字重複
        """
        #   Human vs AI
        #   Have to load trained NN
        """
        self.train_or_play = False  # this will lock the "ai vs human" button

        return

    def loadAI(self, init_model):
        """"
        # load AI
        """

        # return
        # self.buffer_size = 10000
        # self.batch_size = 512  # mini-batch size for training
        # self.data_buffer = deque(maxlen=self.buffer_size)
        init_model = "current_policy.model"
        init_model = "best_policy.model"
        init_model = 'best_policy_12000.pt'
        # init_model = 'best_policy200.pt'

        self.result = False

        # self.policy_value_net = PolicyValueNet(self.width,
        #                                        self.height,
        #                                        model_file=False)
        self.policy_value_net = PolicyValueNet(self.width,
                                               self.height,
                                               model_file=init_model,
                                               use_gpu=False)

        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                      c_puct=self.c_puct,
                                      n_playout=self.n_playout,
                                      is_selfplay=0)
        self.board2 = Board2(width=self.width,
                             height=self.height,
                             n_in_row=self.n_in_row)
        self.board2.init_board(1)
        self.game = Game(self.board2)

    def click3(self):
        """
        ##  Training
        ##  make it self play, AI vs AI, no need to click the mouse, so no need to listen the event
        ##  problem: Let two AIs to play, and learn the NN
        """

        self.train_or_play = True  # this will lock the "ai vs human" button
        self.loadAI(False)
        # self.policy_value_net = PolicyValueNet(self.width,
        #                                        self.height)

        # self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
        #                               c_puct=self.c_puct,
        #                               n_playout=self.n_playout,
        #                               is_selfplay=1)
        print(self.width, self.height)

        # self.step += 1
        # # self.train_agents()
        # print("agent1: load")
        self.train_nn_agents()

    def train_nn_agents(self):
        self.step += 1
        print("============agents at steps %d ============" % self.step)
        if (self.clicked == 1
            ):  # Black stone, Gomoku agent, using very good heuristic function
            print("Black begins at step %d, %d>>>>>>>>>>>>>>>>>>>>>" %
                  (self.step, self.step_record_chess_board.who_to_play()))
            x = 0
            y = 0
            max_value = 0
            result = 0
            for i in range(0, 15):
                for j in range(0, 15):
                    if (self.step_record_chess_board.value[1][i][j] >= 90000):
                        x = i
                        y = j
                        max_value = 99999
                        break
                    elif (self.step_record_chess_board.value[0][i][j] >=
                          max_value):
                        x = i
                        y = j
                        max_value = self.step_record_chess_board.value[0][i][j]

            if (self.step_record_chess_board.value[1][x][y] >= 90000):
                print("win black in black0!!!!!!!!!!!!!!!!")
                result = 1

            self.board.state = np.copy(self.step_record_chess_board.state)
            self.AI.value = self.step_record_chess_board.value[0]
            self.AI.update(self.board.state)
            # print("temp state, before move", self.AI.value)
            action = self.AI.bestAction()
            x, y = action

            self.create_oval(self.chess_board_points[x][y].pixel_x - 10,
                             self.chess_board_points[x][y].pixel_y - 10,
                             self.chess_board_points[x][y].pixel_x + 10,
                             self.chess_board_points[x][y].pixel_y + 10,
                             fill='black')
            move2 = (self.height - y - 1) * self.width + x

            self.board2.current_player = 1
            self.board2.do_move(move2)
            print("Black, Gomoku takes action: ", move2, x, y,
                  (self.step, self.step_record_chess_board.who_to_play()))
            self.step_record_chess_board.insert_record(
                x, y)  # this function will switch to another player
            # if (self.step_record_chess_board.value[1][x][y] >= 90000):
            #     print("win white in black")
            #     result = 2
            # if (self.step_record_chess_board.value[2][x][y] >= 90000):
            #     print("win black in black")
            #     result = 1
            m6 = self.board.isWin(self.step_record_chess_board.state, (x, y),
                                  1)
            if result == 1 or m6:
                self.create_text(240, 475, text='the black wins, b11')
                return
            elif result == 2:
                self.create_text(240, 475, text='the white wins, b22')
                return
            self.clicked = 0

        if (self.clicked != 1):  # white stone, AlphaZero Angent
            print("White begins at step %d, %d >>>>>>>>>>>>>>>>>>>>>" %
                  (self.step, self.step_record_chess_board.who_to_play()))
            self.clicked = 1
            if self.step_record_chess_board.who_to_play() == 1:
                cur_play = 1
            elif self.step_record_chess_board.who_to_play() == 2:
                cur_play = 2  # current is white, 2

            if (self.step_record_chess_board.value[2][x][y] >= 90000):
                print("win white in white!!!!!!!!!!!!!!!!")
                result = 2

            # NN AI do:
            # get board state information
            temp_board = np.copy(self.step_record_chess_board.state)
            self.board2.update_state(temp_board)

            self.board2.current_player = cur_play
            self.mcts_player.reset_player()
            self.mcts_player.set_player_ind(cur_play)
            test_moved = list(
                set(range(self.width * self.height)) -
                set(self.board2.availables))
            self.mcts_player.reset_player()
            action = self.mcts_player.get_action(self.board2)
            self.board2.do_move(action)
            x = action % self.width
            y = action // self.height
            y = self.height - y - 1
            # print("after action",self.board2.states, len(self.board2.availables))
            # print("White, NN agent want to place at: ", action, x, y, (self.step,self.step_record_chess_board.who_to_play()))
            # self.board2.do_move(action)
            # insert into the record, for the white player to use
            self.step_record_chess_board.insert_record(x, y)
            # print("----------------------------------------")

            self.create_oval(self.chess_board_points[x][y].pixel_x - 10,
                             self.chess_board_points[x][y].pixel_y - 10,
                             self.chess_board_points[x][y].pixel_x + 10,
                             self.chess_board_points[x][y].pixel_y + 10,
                             fill='white')
            if self.result:
                print("white wins")
                return
            # if (self.step_record_chess_board.value[1][x][y] >= 90000):
            #     print("win white in white")
            #     result = 2
            # if (self.step_record_chess_board.value[2][x][y] >= 90000):
            #     print("win black in white")
            #     result = 1
            m6 = self.board.isWin(self.step_record_chess_board.state, (x, y),
                                  2)
            if result == 1:
                self.create_text(240, 475, text='the black wins, w12')
                return

            elif result == 2 or m6:
                self.create_text(240, 475, text='the white wins, w22')
                return
            self.clicked = 1

        if self.text_id:
            print(self.text_id)
            self.delete(self.text_id)
        self.text_id = self.create_text(150, 475, text='Step: %d' % self.step)
        self.after(10, self.train_nn_agents)

    def collect_selfplay_data(self, n_games=1):
        """collect self-play data for training"""
        for i in range(n_games):
            winner, play_data = self.game.start_self_play(self.mcts_player,
                                                          temp=1)
            play_data = list(play_data)[:]
            self.episode_len = len(play_data)
            # augment the data
            play_data = self.get_equi_data(play_data)
            self.data_buffer.extend(play_data)

    def get_equi_data(self, play_data):
        """augment the data set by rotation and flipping
                play_data: [(state, mcts_prob, winner_z), ..., ...]
                """
        extend_data = []
        for state, mcts_porb, winner in play_data:
            for i in [1, 2, 3, 4]:
                # rotate counterclockwise
                equi_state = np.array([np.rot90(s, i) for s in state])
                equi_mcts_prob = np.rot90(
                    np.flipud(mcts_porb.reshape(self.height, self.width)), i)
                extend_data.append(
                    (equi_state, np.flipud(equi_mcts_prob).flatten(), winner))
                # flip horizontally
                equi_state = np.array([np.fliplr(s) for s in equi_state])
                equi_mcts_prob = np.fliplr(equi_mcts_prob)
                extend_data.append(
                    (equi_state, np.flipud(equi_mcts_prob).flatten(), winner))
        return extend_data

    def train_agents(self):
        self.step += 1
        if (self.clicked != 1):
            x = 0
            y = 0
            max_value = 0
            result = 0
            for i in range(0, 15):
                for j in range(0, 15):
                    if (self.step_record_chess_board.value[1][i][j] >= 90000):
                        x = i
                        y = j
                        max_value = 99999
                        break
                    elif (self.step_record_chess_board.value[0][i][j] >=
                          max_value):
                        x = i
                        y = j
                        max_value = self.step_record_chess_board.value[0][i][j]
            if (self.step_record_chess_board.value[1][x][y] >= 90000):
                result = 2
            """
            Important 2:
            Only below 4 line are interact between the AI agent and the board
            self.board.state = np.copy(self.step_record_chess_board.state)  # make a deep copy of state
            self.AI_1.value = self.step_record_chess_board.value[1]         # assign board information, a 15*15 array
            self.AI_1.update(self.board.state)                              # AI function, do some calculations
            action = self.AI_1.bestAction()                                 # Best actions the AI will make
            """
            self.board.state = np.copy(self.step_record_chess_board.state)
            self.AI_1.value = self.step_record_chess_board.value[1]
            self.AI_1.update(self.board.state)
            action = self.AI_1.bestAction()

            x, y = action

            self.step_record_chess_board.insert_record(x, y)

            self.create_oval(self.chess_board_points[x][y].pixel_x - 10,
                             self.chess_board_points[x][y].pixel_y - 10,
                             self.chess_board_points[x][y].pixel_x + 10,
                             self.chess_board_points[x][y].pixel_y + 10,
                             fill='black')
            if result == 1:
                self.create_text(240, 475, text='the black wins')
                return

            elif result == 2:
                self.create_text(240, 475, text='the white wins')
                return
            self.clicked = 1

            # 根據價值網路落子
        if (self.clicked == 1):  # White stone
            x = 0
            y = 0
            max_value = 0
            for i in range(0, 15):
                for j in range(0, 15):
                    if (self.step_record_chess_board.value[2][i][j] >= 90000):
                        x = i
                        y = j
                        max_value = 99999
                        break
                    elif (self.step_record_chess_board.value[0][i][j] >=
                          max_value):
                        x = i
                        y = j
                        max_value = self.step_record_chess_board.value[0][i][j]

            if (self.step_record_chess_board.value[2][x][y] >= 90000):
                result = 2

            self.board.state = np.copy(self.step_record_chess_board.state)
            self.AI.value = self.step_record_chess_board.value[0]
            self.AI.update(self.board.state)
            action = self.AI.bestAction()
            x, y = action

            self.step_record_chess_board.insert_record(x, y)
            self.create_oval(self.chess_board_points[x][y].pixel_x - 10,
                             self.chess_board_points[x][y].pixel_y - 10,
                             self.chess_board_points[x][y].pixel_x + 10,
                             self.chess_board_points[x][y].pixel_y + 10,
                             fill='white')

            if result == 1:
                self.create_text(240, 475, text='the black wins')

                return
            elif result == 2:
                self.create_text(240, 475, text='the white wins')
                return
            self.clicked = 0

        if self.text_id:
            print(self.text_id)
            self.delete(self.text_id)
        self.text_id = self.create_text(150, 475, text='Step: %d' % self.step)
        self.after(10, self.train_agents)