Пример #1
0
    def get(self):
        self.response.headers['Content-Type'] = 'text/html'

        template_values = {
                        "chart_src": "../static/images/icon1.jpg"
                        }
        self.response.write(render_str("home.html", template_values))
Пример #2
0
    def render(self):
        # replace new line characters with breaks
        self.__render_text = self.content.replace('\n', '<br>')

        comments_query = Comment.query(Comment.post == self.key)\
            .order(-Comment.created)

        num_comments = comments_query.count()
        if num_comments == 1:
            text = "comment"
        else:
            text = "comments"
        comment_count = "{} {}".format(num_comments, text)

        likes_query = Like.query(Like.post == self.key)
        num_likes = likes_query.count()
        if num_likes == 1:
            text = "like"
        else:
            text = "likes"
        like_count = "{} {}".format(num_likes, text)

        user = self.user.get()

        return utils.render_str('post.html',
                                post=self,
                                author=user,
                                comment_count=comment_count,
                                like_count=like_count)
Пример #3
0
    def render(self, username):
        self._isAuthor = False
        if username == self.author.name:
            self._isAuthor = True

        self._render_text = self.content.replace('\n', '<br>')
        self._key = self.key().id()
        return utils.render_str("comment.html", c=self)
Пример #4
0
 def render(self, user=None):
     # replace new line characters with breaks
     self.__render_text = self.content.replace('\n', '<br>')
     comment_user = self.user.get()
     return utils.render_str('comment.html',
                             comment=self,
                             comment_user=comment_user,
                             user=user)
Пример #5
0
    def showPermalink(self, username):
        """ Post class render permalink """
        self._render_text = self.content.replace('\n', '<br>')
        self._key = self.key().id()
        self._can_edit = False
        self._is_permalink = True

        if self.author.name == username:
            self._can_edit = True
            return utils.render_str("editpost.html",
                                    title="Edit post",
                                    username=username,
                                    author=self.author.name,
                                    subject=self.subject,
                                    content=self._render_text,
                                    key=self._key,
                                    error="")
        else:
            return utils.render_str("post.html", p=self)
Пример #6
0
	def SOAPRequest(self, operation, input_parameters={}):
		parameters = dict(self.credentials.items() + input_parameters.items())

		request = utils.render_str("soap/" + operation + ".xml", parameters)
		response = None
		try:
			response = urlfetch.fetch(url='http://ideone.com/api/1/service',
								method=urlfetch.POST,
								payload=request,
								deadline=10,
								headers={'Content-Type:':'text/xml; charset=UTF-8'})
		except Exception, e:
			return {'error':'timeout'}
Пример #7
0
    def render(self, username):
        """ Post class render method """
        self._render_text = self.content.replace('\n', '<br>')
        self._key = self.key().id()
        self._can_edit = False

        key = db.Key.from_path('Post', int(self._key), parent=utils.blog_key())

        if self.author.name == username:
            self._can_edit = True

        return utils.render_str("post.html",
                                p=self)
Пример #8
0
def main():
    turn = 0
    enemy_turn = 1

    board = np.zeros([17, 17])
    root_id = (0,)
    win_index = 0
    action_index = None

    GameInfo.game_board = board

    # 게임이 시작된  후 끝날때까지 진행되는 메인 코드
    while win_index == 0:
        utils.render_str(board)  # 콘솔에 보드 상태 출력

        # 다음 액션에 대한 입력을 대기
        action, action_index = evaluator.get_action(root_id,
                                                    board,
                                                    turn,
                                                    enemy_turn)
        # 액션 실행
        if turn != enemy_turn:
            root_id = evaluator.player.root_id + action + (action_index,)
        else:
Пример #9
0
 def render_post(self):
     self._id = self.key().id()
     self._render_text = self.content.replace('\n', '<br>')
     return render_str("post.html", post=self)
Пример #10
0
def main():
    # 에이전트 설정
    evaluator.set_agents(player_model_path, enemy_model_path,
                         monitor_model_path)

    # 웹 서버에 각 agent들의 정보 전달
    player_agent_info.agent = evaluator.player
    enemy_agent_info.agent = evaluator.enemy

    env = evaluator.return_env()  # env.env_small.GameState()

    result = {'Player': 0, 'Enemy': 0, 'Draw': 0}  # 승, 패, 무승부
    turn = 0  # 플레이어 턴
    enemy_turn = 1  # 적 턴
    player_elo = 1500  # 플레이어 레이팅
    enemy_elo = 1500  # 적 레이팅

    # 웹 서버에 적 턴 변수와 게임 상태 전달
    game_info.enemy_turn = enemy_turn
    game_info.game_status = 0

    # 플레이어와 적의 레이팅 출력
    print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format(
        player_elo, enemy_elo))

    # N_MATCH 번의 매치 실행
    for i in range(N_MATCH):
        board = np.zeros([BOARD_SIZE, BOARD_SIZE
                          ])  # 가로, 세로 BOARD_SIZE크기인 2차원 array형태의 판 생성
        root_id = (0, )  # 지금까지의 착수 위치들이 기록됨
        win_index = 0  # 승패 결과 (0: 플레이 중, 1: 흑 승, 2: 백 승, 3: 무승부)
        action_index = None  # 현재 턴의 착수 위치

        # 웹 서버에 게임판의 정보 전달
        game_info.game_board = board

        # 한 게임마다 선공을 바꿈
        if i % 2 == 0:
            print('Player Color: Black')
        else:
            print('Player Color: White')
        # 0:Running 1:Player Win, 2: Enemy Win 3: Draw
        game_info.game_status = 0

        # 승패가 결정날때 까지 실행되는 게임의 메인 루프
        while win_index == 0:
            utils.render_str(board, BOARD_SIZE,
                             action_index)  # 게임판을 콘솔창에 텍스트 형식으로 출력

            # agents.ZeroAgent().get_pv() 호출
            # policy(각 착수위치의 승리 가능성)와 value(이번 턴의 플레이어의 승리 가능성이 높으면 높은 값)를 받음
            p, v = evaluator.monitor.get_pv(root_id)

            # 착수위치를 입력받음
            # action : (boradsize**2)크기의 1차원 array에 착수위치가 입력됨
            # action_index : 착수위치의 index
            action, action_index = evaluator.get_action(
                root_id, board, turn, enemy_turn)

            # root_id 에 현재 착수위치 추가
            if turn != enemy_turn:
                # player turn
                root_id = evaluator.player.root_id + (action_index, )
            else:
                # enemy turn
                root_id = evaluator.enemy.root_id + (action_index, )

            # 게임판, 유효 착수위치인지, 게임 진행상황, 턴 정보를 받음
            board, check_valid_pos, win_index, turn, _ = env.step(action)

            # 위의 정보들을 웹 서버에 전달
            game_info.game_board = board
            game_info.action_index = int(action_index)
            game_info.win_index = win_index
            game_info.curr_turn = turn  # 0 black 1 white

            # 몇 번째 턴인지 셈
            move = np.count_nonzero(board)

            if turn == enemy_turn:
                # enemy turn
                # 플레이어가 human이나 Web이면 monitor에이전트의 visit과 policy를 웹 서버에 전달
                if isinstance(evaluator.player, agents.HumanAgent) or \
                        isinstance(evaluator.player, agents.WebAgent):
                    player_agent_info.visit = evaluator.monitor.get_visit()
                    player_agent_info.p = evaluator.monitor.get_policy()
                else:  # 아니면 player에이전트의 visit과 policy를 웹 서버에 전달
                    player_agent_info.visit = evaluator.player.get_visit()
                    player_agent_info.p = evaluator.player.get_policy()

                player_agent_info.add_value(move, v)  # 웹 서버에 move와 v전달
                evaluator.enemy.del_parents(
                    root_id)  # 적 에이전트의 tree에서 root_id보다 짧은 트리를 모두 삭제

            else:
                # player turn
                # 웹 서버에 적 에이전트의 visit, policy, move, v 전달
                enemy_agent_info.visit = evaluator.enemy.get_visit()
                enemy_agent_info.p = evaluator.enemy.get_policy()
                enemy_agent_info.add_value(move, v)
                evaluator.player.del_parents(
                    root_id)  # human 에이전트에선 별다른 동작을 하지 않는다

            if win_index != 0:  # 승패가 결정됐다면
                # 초기화
                player_agent_info.clear_values()
                enemy_agent_info.clear_values()
                # 0:Running 1:Player Win, 2: Enemy Win 3: Draw
                game_info.game_status = win_index  # 웹 서버에 승패 전달

                if turn == enemy_turn:  # 적 턴이면
                    if win_index == 3:  # 무승부
                        result['Draw'] += 1
                        print('\nDraw!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5,
                                                    0.5)  # 레이팅 변동 없음
                    else:  # 플레이어가 마지막 돌을 착수 후 턴이 바뀌고 게임이 끝나므로 플레이어 승
                        result['Player'] += 1
                        print('\nPlayer Win!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 1,
                                                    0)
                else:  # 플레이어 턴이면 (위와 동일)
                    if win_index == 3:
                        result['Draw'] += 1
                        print('\nDraw!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5,
                                                    0.5)
                    else:
                        result['Enemy'] += 1
                        print('\nEnemy Win!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 0,
                                                    1)

                utils.render_str(board, BOARD_SIZE, action_index)  # 게임판 렌더링
                # 선후공 교체
                enemy_turn = abs(enemy_turn - 1)
                turn = 0

                # 웹 서버에 선후공과 턴 정보 전달
                game_info.enemy_turn = enemy_turn
                game_info.curr_turn = turn

                # 게임 결과 요약 출력
                pw, ew, dr = result['Player'], result['Enemy'], result['Draw']
                winrate = (pw + 0.5 * dr) / (pw + ew + dr) * 100
                print('')
                print('=' * 20, " {}  Game End  ".format(i + 1), '=' * 20)
                print('Player Win: {}'
                      '  Enemy Win: {}'
                      '  Draw: {}'
                      '  Winrate: {:.2f}%'.format(pw, ew, dr, winrate))
                print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format(
                    player_elo, enemy_elo))
                evaluator.reset()  # evaluator리셋
Пример #11
0
 def render_history(self):
     self._render_text = self.content.replace('\n', '')
     return utils.render_str("history_edits.html", p = self)
Пример #12
0
def main():
    print('cuda:', use_cuda)

    # g_evaluator = evaluator

    env = game.GameState('text')
    result = {'Player': 0, 'Enemy': 0, 'Draw': 0}
    turn = 0
    enemy_turn = 1
    gi.enemy_turn = enemy_turn
    player_elo = 1500
    enemy_elo = 1500

    print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format(
        player_elo, enemy_elo))

    # i = 0

    for i in range(N_MATCH):
        board = np.zeros([BOARD_SIZE, BOARD_SIZE])
        root_id = (0, )
        win_index = 0
        action_index = None

        if i % 2 == 0:
            print('Player Color: Black')
        else:
            print('Player Color: White')

        while win_index == 0:
            utils.render_str(board, BOARD_SIZE, action_index)
            action, action_index = evaluator.get_action(
                root_id, board, turn, enemy_turn)

            p, v = evaluator.get_pv(root_id, turn, enemy_turn)

            if turn != enemy_turn:
                # player turn
                root_id = evaluator.player.root_id + (action_index, )
            else:
                # enemy turn
                root_id = evaluator.enemy.root_id + (action_index, )

            board, check_valid_pos, win_index, turn, _ = env.step(action)

            # WebAPI
            gi.game_board = board
            gi.action_index = int(action_index)
            gi.win_index = win_index
            gi.curr_turn = turn

            move = np.count_nonzero(board)

            if evaluator.get_player_visit() is not None:
                player_agent_info.visit = evaluator.get_player_visit()

            if evaluator.get_enemy_visit() is not None:
                enemy_agent_info.visit = evaluator.get_enemy_visit()

            if turn == enemy_turn:
                evaluator.enemy.del_parents(root_id)
                player_agent_info.add_value(move, v)
                player_agent_info.p = p

            else:
                evaluator.player.del_parents(root_id)
                enemy_agent_info.add_value(move, v)
                enemy_agent_info.p = p

            # used for debugging
            if not check_valid_pos:
                raise ValueError('no legal move!')

            if win_index != 0:
                player_agent_info.clear_values()
                enemy_agent_info.clear_values()
                if turn == enemy_turn:
                    if win_index == 3:
                        result['Draw'] += 1
                        print('\nDraw!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5,
                                                    0.5)
                    else:
                        result['Player'] += 1
                        print('\nPlayer Win!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 1,
                                                    0)
                else:
                    if win_index == 3:
                        result['Draw'] += 1
                        print('\nDraw!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5,
                                                    0.5)
                    else:
                        result['Enemy'] += 1
                        print('\nEnemy Win!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 0,
                                                    1)

                utils.render_str(board, BOARD_SIZE, action_index)
                # Change turn
                enemy_turn = abs(enemy_turn - 1)
                gi.enemy_turn = enemy_turn
                turn = 0
                pw, ew, dr = result['Player'], result['Enemy'], result['Draw']
                winrate = (pw + 0.5 * dr) / (pw + ew + dr) * 100
                print('')
                print('=' * 20, " {}  Game End  ".format(i + 1), '=' * 20)
                print('Player Win: {}'
                      '  Enemy Win: {}'
                      '  Draw: {}'
                      '  Winrate: {:.2f}%'.format(pw, ew, dr, winrate))
                print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format(
                    player_elo, enemy_elo))
                evaluator.reset()
Пример #13
0
 def render(self):
     self._render_text = self.content.replace('\n', '<br>')
     return utils.render_str("post.html", p=self,
                             nlikers=len(list(self.likers)),
                             ndislikers=len(list(self.dislikers)),
                             ncomments=len(list(self.comments)))
Пример #14
0
def self_play(n_selfplay):
    global cur_memory, rep_memory
    global Agent

    state_black = deque()
    state_white = deque()
    pi_black = deque()
    pi_white = deque()

    if RESIGN_MODE:
        resign_val_balck = []
        resign_val_white = []
        resign_val = []
        resign_v = -1.0
        n_resign_thres = N_SELFPLAY // 4

    for episode in range(n_selfplay):
        if (episode + 1) % 10 == 0:
            logging.warning('Playing Episode {:3}'.format(episode + 1))

        env = game.GameState('text')
        board = np.zeros((BOARD_SIZE, BOARD_SIZE), 'float')
        turn = 0
        root_id = (0, )
        win_index = 0
        time_steps = 0
        action_index = None

        if RESIGN_MODE:
            resign_index = 0

        while win_index == 0:
            if PRINT_SELFPLAY:
                utils.render_str(board, BOARD_SIZE, action_index)

            # ====================== start MCTS ============================ #

            if time_steps < TAU_THRES:
                tau = 1
            else:
                tau = 0

            pi = Agent.get_pi(root_id, tau)

            # ===================== collect samples ======================== #

            state = utils.get_state_pt(root_id, BOARD_SIZE, IN_PLANES)

            if turn == 0:
                state_black.appendleft(state)
                pi_black.appendleft(pi)
            else:
                state_white.appendleft(state)
                pi_white.appendleft(pi)

            # ======================== get action ========================== #

            action, action_index = utils.get_action(pi)
            root_id += (action_index, )

            # ====================== print evaluation ====================== #

            if PRINT_SELFPLAY:
                Agent.model.eval()
                with torch.no_grad():
                    state_input = torch.tensor([state]).to(device).float()
                    p, v = Agent.model(state_input)
                    p = p.cpu().numpy()[0]
                    v = v.item()

                    print('\nPi:\n{}'.format(
                        pi.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2)))
                    print('\nPolicy:\n{}'.format(
                        p.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2)))

                if turn == 0:
                    print("\nBlack's win%: {:.2f}%".format((v + 1) / 2 * 100))
                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            resign_val_balck.append(v)
                        elif v < resign_v:
                            resign_index = 2
                            if PRINT_SELFPLAY:
                                print('"Black Resign!"')
                else:
                    print("\nWhite's win%: {:.2f}%".format((v + 1) / 2 * 100))
                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            resign_val_white.append(v)
                        elif v < resign_v:
                            resign_index = 1
                            if PRINT_SELFPLAY:
                                print('"White Resign!"')

            # =========================== step ============================= #

            board, _, win_index, turn, _ = env.step(action)
            time_steps += 1

            # ========================== result ============================ #

            if RESIGN_MODE:
                if resign_index != 0:
                    win_index = resign_index
                    result['Resign'] += 1

            if win_index != 0:
                if win_index == 1:
                    reward_black = 1.
                    reward_white = -1.
                    result['Black'] += 1

                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            for val in resign_val_balck:
                                resign_val.append(val)
                            resign_val_balck.clear()
                            resign_val_white.clear()

                elif win_index == 2:
                    reward_black = -1.
                    reward_white = 1.
                    result['White'] += 1

                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            for val in resign_val_white:
                                resign_val.append(val)
                            resign_val_white.clear()
                            resign_val_balck.clear()
                else:
                    reward_black = 0.
                    reward_white = 0.
                    result['Draw'] += 1

                    if RESIGN_MODE:
                        if episode < n_resign_thres:
                            for val in resign_val_balck:
                                resign_val.append(val)
                            for val in resign_val_white:
                                resign_val.append(val)
                            resign_val_balck.clear()
                            resign_val_white.clear()

                if RESIGN_MODE:
                    if episode + 1 == n_resign_thres:
                        resign_v = min(resign_val)
                        resign_val.clear()

                    if PRINT_SELFPLAY:
                        print('Resign win%: {:.2f}%'.format(
                            (resign_v + 1) / 2 * 100))

            # ====================== store in memory ======================= #

                while state_black or state_white:
                    if state_black:
                        cur_memory.append(
                            (state_black.pop(), pi_black.pop(), reward_black))
                    if state_white:
                        cur_memory.append(
                            (state_white.pop(), pi_white.pop(), reward_white))

            # =========================  result  =========================== #

                if PRINT_SELFPLAY:
                    utils.render_str(board, BOARD_SIZE, action_index)

                    bw, ww, dr, rs = result['Black'], result['White'], \
                        result['Draw'], result['Resign']
                    print('')
                    print('=' * 20, " {:3} Game End   ".format(episode + 1),
                          '=' * 20)
                    print('Black Win: {:3}   '
                          'White Win: {:3}   '
                          'Draw: {:2}   '
                          'Win%: {:.2f}%'
                          '\nResign: {:2}'.format(bw, ww, dr, (bw + 0.5 * dr) /
                                                  (bw + ww + dr) * 100, rs))
                    print('current memory size:', len(cur_memory))

                Agent.reset()

    rep_memory.extend(utils.augment_dataset(cur_memory, BOARD_SIZE))
 def render(self):
     self._render_text = self.content.replace('\n', '<br>')
     return render_str("partials/comment.html", c=self)
Пример #16
0
	def render(self):
		self._render_text = self.content.replace('\n', '<br>')
		return utils.render_str("post.html", p = self)
Пример #17
0
 def render(self, logged_in_user):
     self._render_text = self.content.replace('\n', '<br>')
     return utils.render_str("comment.html",
                             c=self,
                             logged_in_user=logged_in_user)
Пример #18
0
 def render_str(self, template, **params):
     params['user'] = self.user
     return utils.render_str(template, **params)
Пример #19
0
 def render(self):
     self._render_text = self.content
     return utils.render_str("content.html", p=self)
Пример #20
0
def main():
    evaluator = Evaluator(player_model_path, enemy_model_path)

    env = evaluator.return_env()

    result = {'Player': 0, 'Enemy': 0, 'Draw': 0}
    turn = 0
    enemy_turn = 1
    player_elo = 1500
    enemy_elo = 1500

    print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format(
        player_elo, enemy_elo))

    for i in range(N_MATCH):
        board = np.zeros([BOARD_SIZE, BOARD_SIZE])
        root_id = (0, )
        win_index = 0
        action_index = None

        if i % 2 == 0:
            print('Player Color: Black')
        else:
            print('Player Color: White')

        while win_index == 0:
            utils.render_str(board, BOARD_SIZE, action_index)
            action, action_index = evaluator.get_action(
                root_id, board, turn, enemy_turn)

            if turn != enemy_turn:
                # player turn
                root_id = evaluator.player.root_id + (action_index, )
            else:
                # enemy turn
                root_id = evaluator.enemy.root_id + (action_index, )

            board, check_valid_pos, win_index, turn, _ = env.step(action)

            if turn == enemy_turn:
                evaluator.enemy.del_parents(root_id)
            else:
                evaluator.player.del_parents(root_id)

            if win_index != 0:
                if turn == enemy_turn:
                    if win_index == 3:
                        result['Draw'] += 1
                        print('\nDraw!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5,
                                                    0.5)
                    else:
                        result['Player'] += 1
                        print('\nPlayer Win!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 1,
                                                    0)
                else:
                    if win_index == 3:
                        result['Draw'] += 1
                        print('\nDraw!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 0.5,
                                                    0.5)
                    else:
                        result['Enemy'] += 1
                        print('\nEnemy Win!')
                        player_elo, enemy_elo = elo(player_elo, enemy_elo, 0,
                                                    1)

                utils.render_str(board, BOARD_SIZE, action_index)
                # Change turn
                enemy_turn = abs(enemy_turn - 1)
                turn = 0
                pw, ew, dr = result['Player'], result['Enemy'], result['Draw']
                winrate = (pw + 0.5 * dr) / (pw + ew + dr) * 100
                print('')
                print('=' * 20, " {}  Game End  ".format(i + 1), '=' * 20)
                print('Player Win: {}'
                      '  Enemy Win: {}'
                      '  Draw: {}'
                      '  Winrate: {:.2f}%'.format(pw, ew, dr, winrate))
                print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format(
                    player_elo, enemy_elo))
                evaluator.reset()
Пример #21
0
def main():
    evaluator.set_agents(
        player_model_path, enemy_model_path, monitor_model_path)

    player_agent_info.agent = evaluator.player
    enemy_agent_info.agent = evaluator.enemy

    env = evaluator.return_env()

    result = {'Player': 0, 'Enemy': 0, 'Draw': 0}
    turn = 0
    enemy_turn = 1
    player_elo = 1500
    enemy_elo = 1500

    game_info.enemy_turn = enemy_turn
    game_info.game_status = 0

    print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format(
        player_elo, enemy_elo))

    for i in range(N_MATCH):
        board = np.zeros([BOARD_SIZE, BOARD_SIZE])
        root_id = (0,)
        win_index = 0
        action_index = None

        game_info.game_board = board

        if i % 2 == 0:
            print('Player Color: Black')
        else:
            print('Player Color: White')
        # 0:Running 1:Player Win, 2: Enemy Win 3: Draw
        game_info.game_status = 0

        while win_index == 0:
            utils.render_str(board, BOARD_SIZE, action_index)

            p, v = evaluator.monitor.get_pv(root_id)

            action, action_index = evaluator.get_action(root_id,
                                                        board,
                                                        turn,
                                                        enemy_turn)

            if turn != enemy_turn:
                # player turn
                root_id = evaluator.player.root_id + (action_index,)
            else:
                # enemy turn
                root_id = evaluator.enemy.root_id + (action_index,)

            board, check_valid_pos, win_index, turn, _ = env.step(action)

            game_info.game_board = board
            game_info.action_index = int(action_index)
            game_info.win_index = win_index
            game_info.curr_turn = turn  # 0 black 1 white

            move = np.count_nonzero(board)

            if turn == enemy_turn:

                if isinstance(evaluator.player, agents.HumanAgent) or \
                        isinstance(evaluator.player, agents.WebAgent):
                    player_agent_info.visit = evaluator.monitor.get_visit()
                    player_agent_info.p = evaluator.monitor.get_policy()
                else:
                    player_agent_info.visit = evaluator.player.get_visit()
                    player_agent_info.p = evaluator.player.get_policy()

                player_agent_info.add_value(move, v)
                evaluator.enemy.del_parents(root_id)

            else:
                enemy_agent_info.visit = evaluator.enemy.get_visit()
                enemy_agent_info.p = evaluator.enemy.get_policy()
                enemy_agent_info.add_value(move, v)
                evaluator.player.del_parents(root_id)

            if win_index != 0:
                player_agent_info.clear_values()
                enemy_agent_info.clear_values()
                # 0:Running 1:Player Win, 2: Enemy Win 3: Draw
                game_info.game_status = win_index

                if turn == enemy_turn:
                    if win_index == 3:
                        result['Draw'] += 1
                        print('\nDraw!')
                        player_elo, enemy_elo = elo(
                            player_elo, enemy_elo, 0.5, 0.5)
                    else:
                        result['Player'] += 1
                        print('\nPlayer Win!')
                        player_elo, enemy_elo = elo(
                            player_elo, enemy_elo, 1, 0)
                else:
                    if win_index == 3:
                        result['Draw'] += 1
                        print('\nDraw!')
                        player_elo, enemy_elo = elo(
                            player_elo, enemy_elo, 0.5, 0.5)
                    else:
                        result['Enemy'] += 1
                        print('\nEnemy Win!')
                        player_elo, enemy_elo = elo(
                            player_elo, enemy_elo, 0, 1)

                utils.render_str(board, BOARD_SIZE, action_index)
                # Change turn
                enemy_turn = abs(enemy_turn - 1)
                turn = 0

                game_info.enemy_turn = enemy_turn
                game_info.curr_turn = turn

                pw, ew, dr = result['Player'], result['Enemy'], result['Draw']
                winrate = (pw + 0.5 * dr) / (pw + ew + dr) * 100
                print('')
                print('=' * 20, " {}  Game End  ".format(i + 1), '=' * 20)
                print('Player Win: {}'
                      '  Enemy Win: {}'
                      '  Draw: {}'
                      '  Winrate: {:.2f}%'.format(
                          pw, ew, dr, winrate))
                print('Player ELO: {:.0f}, Enemy ELO: {:.0f}'.format(
                    player_elo, enemy_elo))
                evaluator.reset()
Пример #22
0
 def render_str(self, template, **params):
     return utils.render_str(template, **params)
Пример #23
0
def self_play(agent, cur_memory, rank=0):
    agent.model.eval()
    state_black = deque()
    state_white = deque()
    pi_black = deque()
    pi_white = deque()
    episode = 0
    while True:
        if (episode + 1) % 10 == 0:
            logging.info('Playing Episode {:3}'.format(episode + 1))

        env = game.GameState('text')
        board = np.zeros((BOARD_SIZE, BOARD_SIZE), 'float')
        turn = 0
        root_id = (0, )
        win_index = 0
        time_steps = 0
        action_index = None

        while win_index == 0:
            if PRINT_SELFPLAY and rank == 0:
                utils.render_str(board, BOARD_SIZE, action_index)

            # ====================== start MCTS ============================ #

            if time_steps < TAU_THRES:
                tau = 1
            else:
                tau = 0

            pi = agent.get_pi(root_id, tau, rank)

            # ===================== collect samples ======================== #

            state = utils.get_state_pt(root_id, BOARD_SIZE, IN_PLANES)

            if turn == 0:
                state_black.appendleft(state)
                pi_black.appendleft(pi)
            else:
                state_white.appendleft(state)
                pi_white.appendleft(pi)

            # ======================== get action ========================== #

            action, action_index = utils.get_action(pi)
            root_id += (action_index, )

            # ====================== print evaluation ====================== #

            if PRINT_SELFPLAY and rank == 0:
                with torch.no_grad():
                    state_input = torch.tensor([state]).to(device).float()
                    p, v = agent.model(state_input)
                    p = p.cpu().numpy()[0]
                    v = v.item()

                    print('\nPi:\n{}'.format(
                        pi.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2)))
                    print('\nPolicy:\n{}'.format(
                        p.reshape(BOARD_SIZE, BOARD_SIZE).round(decimals=2)))

                if turn == 0:
                    print("\nBlack's win%: {:.2f}%".format((v + 1) / 2 * 100))
                else:
                    print("\nWhite's win%: {:.2f}%".format((v + 1) / 2 * 100))

            # =========================== step ============================= #

            board, _, win_index, turn, _ = env.step(action)
            time_steps += 1

            # ========================== result ============================ #

            if win_index != 0:
                if win_index == 1:
                    reward_black = 1.
                    reward_white = -1.
                    result['Black'] += 1

                elif win_index == 2:
                    reward_black = -1.
                    reward_white = 1.
                    result['White'] += 1

                else:
                    reward_black = 0.
                    reward_white = 0.
                    result['Draw'] += 1

            # ====================== store in memory ======================= #

                while state_black or state_white:
                    if state_black:
                        cur_memory.append(
                            (state_black.pop(), pi_black.pop(), reward_black))
                    if state_white:
                        cur_memory.append(
                            (state_white.pop(), pi_white.pop(), reward_white))

            # =========================  result  =========================== #

                if PRINT_SELFPLAY and rank == 0:
                    utils.render_str(board, BOARD_SIZE, action_index)

                    bw, ww, dr = result['Black'], result['White'], \
                        result['Draw']
                    print('')
                    print('=' * 20, " {:3} Game End   ".format(episode + 1),
                          '=' * 20)
                    print('Black Win: {:3}   '
                          'White Win: {:3}   '
                          'Draw: {:2}   '
                          'Win%: {:.2f}%'.format(bw, ww, dr, (bw + 0.5 * dr) /
                                                 (bw + ww + dr) * 100))
                    print('current memory size:', len(cur_memory))
                episode += 1
                agent.reset()
                if len(cur_memory) >= MEMORY_SIZE:
                    return utils.augment_dataset(cur_memory, BOARD_SIZE)