Пример #1
0
    def run(self, e=0.25):
        """
        对弈一局,获得一条数据,即从初始到游戏结束的一条数据
        :return:
        """
        state = self.get_init_state()
        game_over = False
        data = []  # 收集(状态,动作)二元组
        value = 0
        last_action = None
        while not game_over:
            policy, action = self.get_action(state, e, last_action)
            data.append((state, policy, last_action))  # 装初始局面不装最终局面,装的是动作执行之前的局面
            board = utils.step(utils.state_to_board(state, self.config.board_size), action)
            state = utils.board_to_state(board)
            # self.pruning_tree(board, state)  # 走完一步以后,对其他分支进行剪枝,以节约内存;注释掉,以节约时间
            game_over, value = utils.is_game_over(board, self.goal)
            # assert value != 1.0
            last_action = action

        self.reset()  # 把树重启
        turns = len(data)
        if turns % 2 == 1:
            value = -value
        weights = utils.construct_weights(turns, gamma=self.config.gamma)
        final_data = []
        for i in range(turns):
            final_data.append((*data[i], value, weights[i]))  # (状态,policy,last_action, value, weight)
            value = -value
        return final_data
Пример #2
0
 def MCTS_search(self, state: str, history: list, last_action: tuple):
     """
     以state为根节点进行MCTS搜索,搜索历史保存在histoty之中
     :param state: 一个字符串代表的当前状态,根节点
     :param history: 包含当前状态的一个列表
     :param last_action: 上一次的落子位置
     :return:
     """
     while True:
         board = utils.state_to_board(state, self.config.board_size)
         game_over, v = utils.is_game_over(board, self.goal)  # 落子前检查game over
         if game_over:
             self.update_tree(v, history=history)
             break
         if state not in self.tree:
             # 未出现过的state,则评估然后展开
             v = self.evaluate_and_expand(state, board, last_action)  # 落子前进行评估
             self.update_tree(v, history=history)
             break
         sel_action = self.select_action_q_and_u(state)  # 根据state选择一个action
         history.append(sel_action)  # 放进action
         board = utils.step(board, sel_action)
         state = utils.board_to_state(board)
         history.append(state)
         last_action = sel_action
Пример #3
0
 def pruning_tree(self, board: np.ndarray, state: str = None):
     """
     主游戏前进一步以后,可以对树进行剪枝,只保留前进的那一步所对应的子树
     :param board:
     :param state:
     :return:
     """
     if state is None:
         state = utils.board_to_state(board)
     keys = list(self.tree.keys())
     for key in keys:
         b = utils.state_to_board(key, self.config.board_size)
         if key != state \
                 and np.all(np.where(board == 1, 1, 0) >= np.where(b == 1, 1, 0)) \
                 and np.all(np.where(board == -1, 1, 0) >= np.where(b == -1, 1, 0)):
             del self.tree[key]
Пример #4
0
def main():
    config.simulation_per_step = 500
    # 先只搜索6060以上的
    all_ckpts = [
        os.path.join("ckpt", "alphaFive-" + str(num))
        for num in range(60, 8800, 60)
    ][100:-1]
    net0 = Model(config.board_size, tf.Graph())
    net0.restore(all_ckpts[0])
    net1 = Model(config.board_size, tf.Graph())
    net1.restore(all_ckpts[-1])
    player0 = Player(config, training=False, pv_fn=net0.eval)
    player1 = Player(config, training=False, pv_fn=net1.eval)
    players = [{
        'p': player0,
        "win": 0,
        "ckpt": all_ckpts[0]
    }, {
        'p': player1,
        "win": 0,
        "ckpt": all_ckpts[-1]
    }]
    result = open("result.txt", "a")
    low, high = 0, len(all_ckpts) - 1
    while low < high:  # 尽量让实力悬殊的ckpt进行对弈
        print("")
        print(
            "=================================================================="
        )
        print(players[0]["ckpt"] + " vs " + players[1]["ckpt"] + '...')
        for i in range(100):  # 最多对弈100局
            players[0]['p'].reset()  # 每一局开始前都要重置
            players[1]['p'].reset()
            game_over = False
            action = None
            state = player1.get_init_state()
            current_ids = i % 2
            value = 0.0
            count = 0
            while not game_over:
                _, action = players[current_ids]['p'].get_action(
                    state, last_action=action, random_a=True)
                board = utils.step(
                    utils.state_to_board(state, config.board_size), action)
                state = utils.board_to_state(board)
                # players[current_ids].pruning_tree(board, state)  # 走完一步以后,对其他分支进行剪枝,以节约内存, 不剪枝,节约时间
                game_over, value = utils.is_game_over(board, config.goal)
                current_ids = (current_ids + 1) % 2  # 下一个选手
                count += 1
            if value == 0.0:  # 和棋了
                print(f"game: {i}, tied! all {count} turns.")
                continue
            else:
                print(
                    f"game: {i} {players[(current_ids+1) % 2]['ckpt']}  won! all {count} turns."
                )
                players[(current_ids + 1) % 2]["win"] += 1
            if i >= 30:
                # 超过24局以后,输赢悬殊太大的话,直接break
                w0 = players[0]["win"]
                w1 = players[1]["win"]
                if w0 == 0 or w1 == 0:
                    break
                elif w0 / w1 > 2.0 or w0 / w1 < 0.5:
                    break
        print_str = players[0]["ckpt"] + ": " + players[1][
            "ckpt"] + f' = {players[0]["win"]}: {players[1]["win"]}'
        print(print_str)
        print(print_str, file=result, flush=True)
        if players[0]["win"] < players[1]["win"]:
            low += 1
            net0.restore(all_ckpts[low])
            players[0]["ckpt"] = all_ckpts[low]
        else:
            high -= 1
            net1.restore(all_ckpts[high])
            players[1]["ckpt"] = all_ckpts[high]

        players[0]["win"] = players[1]["win"] = 0
    result.close()
    net1.close()
    net0.close()
Пример #5
0
def main(trained_ckpt):
    print(config.simulation_per_step)
    net = Model(config.board_size)
    player = Player(config, training=False, pv_fn=net.eval)
    net.restore(trained_ckpt)
    pygame.init()
    screen = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption("五子棋")
    clock = pygame.time.Clock()
    base_folder = os.path.dirname(__file__)
    img_folder = os.path.join(base_folder, 'images')
    background_img = pygame.image.load(os.path.join(img_folder,
                                                    'back.png')).convert()
    background = pygame.transform.scale(background_img, (WIDTH, HEIGHT))
    back_rect = background.get_rect()
    running = True
    frames = []

    def draw_stone(screen_):
        for i in range(config.board_size):
            for j in range(config.board_size):
                if state[i, j] == 1:
                    pygame.draw.circle(screen_, BLACK, (int(
                        (j + 1.5) * GRID_WIDTH), int((i + 1.5) * GRID_WIDTH)),
                                       16)
                elif state[i, j] == -1:
                    pygame.draw.circle(screen_, WHITE, (int(
                        (j + 1.5) * GRID_WIDTH), int((i + 1.5) * GRID_WIDTH)),
                                       16)
                else:
                    assert state[i, j] == 0

    def draw_background(surf):
        screen.blit(background, back_rect)
        rect_lines = [
            ((GRID_WIDTH, GRID_WIDTH), (GRID_WIDTH, HEIGHT - GRID_WIDTH)),
            ((GRID_WIDTH, GRID_WIDTH), (WIDTH - GRID_WIDTH, GRID_WIDTH)),
            ((GRID_WIDTH, HEIGHT - GRID_WIDTH), (WIDTH - GRID_WIDTH,
                                                 HEIGHT - GRID_WIDTH)),
            ((WIDTH - GRID_WIDTH, GRID_WIDTH), (WIDTH - GRID_WIDTH,
                                                HEIGHT - GRID_WIDTH)),
        ]
        for line in rect_lines:
            pygame.draw.line(surf, BLACK, line[0], line[1], 2)

        for i in range(config.board_size):
            pygame.draw.line(surf, BLACK, (GRID_WIDTH * (2 + i), GRID_WIDTH),
                             (GRID_WIDTH * (2 + i), HEIGHT - GRID_WIDTH))
            pygame.draw.line(surf, BLACK, (GRID_WIDTH, GRID_WIDTH * (2 + i)),
                             (HEIGHT - GRID_WIDTH, GRID_WIDTH * (2 + i)))

        circle_center = [
            (GRID_WIDTH * 4, GRID_WIDTH * 4),
            (WIDTH - GRID_WIDTH * 4, GRID_WIDTH * 4),
            (WIDTH - GRID_WIDTH * 4, HEIGHT - GRID_WIDTH * 4),
            (GRID_WIDTH * 4, HEIGHT - GRID_WIDTH * 4),
        ]
        for cc in circle_center:
            pygame.draw.circle(surf, BLACK, cc, 5)

    game_over = False
    state_str = player.get_init_state()
    board = utils.state_to_board(state_str, config.board_size)
    state = board
    draw_background(screen)
    pygame.display.flip()
    image_data = pygame.surfarray.array3d(pygame.display.get_surface())
    frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5))
    turn = 0
    i = 0
    while running:
        clock.tick(FPS)
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
                break
        action = None
        if not game_over:
            _, action = player.get_action(state_str, last_action=action)
            board = utils.step(
                utils.state_to_board(state_str, config.board_size), action)
            state_str = utils.board_to_state(board)
            # player.pruning_tree(board, state_str)  # 走完一步以后,对其他分支进行剪枝,以节约内存
            game_over, value = utils.is_game_over(board, config.goal)
            if turn % 2 == 1:
                state = board
            else:
                state = -board
            turn += 1
            draw_background(screen)
            draw_stone(screen)
            pygame.display.flip()
            image_data = pygame.surfarray.array3d(pygame.display.get_surface())
            frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5))

        # draw_background(screen)
        # draw_stone(screen)
        # pygame.display.flip()
        if game_over:
            i += 1
            image_data = pygame.surfarray.array3d(pygame.display.get_surface())
            frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5))
            if i >= 3:  # 最终保留三帧
                break

    pygame.quit()
    print("game finished, start to write to gif.")
    gif = imageio.mimsave("tmp/five.gif", frames, 'GIF', duration=0.8)
    print("done!")
Пример #6
0
def main(trained_ckpt):
    net = Model(config.board_size)
    player = Player(config, training=False, pv_fn=net.eval)
    net.restore(trained_ckpt)
    pygame.init()
    screen = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption("五子棋")
    clock = pygame.time.Clock()
    base_folder = os.path.dirname(__file__)
    img_folder = os.path.join(base_folder, 'images')
    background_img = pygame.image.load(os.path.join(img_folder,
                                                    'back.png')).convert()
    background = pygame.transform.scale(background_img, (WIDTH, HEIGHT))
    back_rect = background.get_rect()
    running = True
    frames = []

    # def draw_stone(screen_):
    #     for i in range(config.board_size):
    #         for j in range(config.board_size):
    #             if state[i, j] == 1:
    #                 pygame.draw.circle(screen_, BLACK, (int((i + 1.5) * GRID_WIDTH), int((j + 1.5) * GRID_WIDTH)), 16)
    #             elif state[i, j] == -1:
    #                 pygame.draw.circle(screen_, WHITE, (int((i + 1.5) * GRID_WIDTH), int((j + 1.5) * GRID_WIDTH)), 16)
    #             else:
    #                 assert state[i, j] == 0
    def draw_stone(screen_):
        for i in range(config.board_size):
            for j in range(config.board_size):
                if state[i, j] == 1:
                    pygame.draw.circle(screen_, BLACK, (int(
                        (j + 1.5) * GRID_WIDTH), int((i + 1.5) * GRID_WIDTH)),
                                       16)
                elif state[i, j] == -1:
                    pygame.draw.circle(screen_, WHITE, (int(
                        (j + 1.5) * GRID_WIDTH), int((i + 1.5) * GRID_WIDTH)),
                                       16)
                else:
                    assert state[i, j] == 0

    def draw_background(surf):
        screen.blit(background, back_rect)
        rect_lines = [
            ((GRID_WIDTH, GRID_WIDTH), (GRID_WIDTH, HEIGHT - GRID_WIDTH)),
            ((GRID_WIDTH, GRID_WIDTH), (WIDTH - GRID_WIDTH, GRID_WIDTH)),
            ((GRID_WIDTH, HEIGHT - GRID_WIDTH), (WIDTH - GRID_WIDTH,
                                                 HEIGHT - GRID_WIDTH)),
            ((WIDTH - GRID_WIDTH, GRID_WIDTH), (WIDTH - GRID_WIDTH,
                                                HEIGHT - GRID_WIDTH)),
        ]
        for line in rect_lines:
            pygame.draw.line(surf, BLACK, line[0], line[1], 2)

        for i in range(config.board_size):
            pygame.draw.line(surf, BLACK, (GRID_WIDTH * (2 + i), GRID_WIDTH),
                             (GRID_WIDTH * (2 + i), HEIGHT - GRID_WIDTH))
            pygame.draw.line(surf, BLACK, (GRID_WIDTH, GRID_WIDTH * (2 + i)),
                             (HEIGHT - GRID_WIDTH, GRID_WIDTH * (2 + i)))

        circle_center = [
            (GRID_WIDTH * 4, GRID_WIDTH * 4),
            (WIDTH - GRID_WIDTH * 4, GRID_WIDTH * 4),
            (WIDTH - GRID_WIDTH * 4, HEIGHT - GRID_WIDTH * 4),
            (GRID_WIDTH * 4, HEIGHT - GRID_WIDTH * 4),
        ]
        for cc in circle_center:
            pygame.draw.circle(surf, BLACK, cc, 5)

    draw_background(screen)
    pygame.display.flip()
    image_data = pygame.surfarray.array3d(pygame.display.get_surface())
    frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5))
    players = [HUMAN, AI]  # 0 表示人类玩家,2表示包含network的AI
    idx = int(input("input the fist side, (0 human), (1 AI), (2 exit): "))
    while idx not in [0, 1, 2]:
        idx = int(input("input the fist side, (0 human), (1 AI), (2 exit): "))
    if idx == 2:
        exit()
    if players[idx] == AI:
        print("AI first")
    else:
        print("Human first")
    game_over = False
    state_str = player.get_init_state()
    board = utils.state_to_board(state_str, config.board_size)
    state = board
    last_action = None
    huihe = 0
    if players[idx] == AI:
        _, action = player.get_action(state_str, last_action=last_action)
        print("AI's action, ", action)
        huihe += 1
        board = utils.step(utils.state_to_board(state_str, config.board_size),
                           action)
        state_str = utils.board_to_state(board)
        # player.pruning_tree(board, state_str)  # 走完一步以后,对其他分支进行剪枝,以节约内存
        game_over, value = utils.is_game_over(board, config.goal)
        state = -board
        draw_background(screen)
        draw_stone(screen)
        pygame.display.flip()
        image_data = pygame.surfarray.array3d(pygame.display.get_surface())
        frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5))
    i = 0
    while running:
        clock.tick(FPS)
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
                break
            elif event.type == pygame.MOUSEBUTTONDOWN:
                if game_over:
                    break
                pos = event.pos  # 获得的坐标是(x, y)
                if out_of_boundry(pos):
                    continue
                action = (int((pos[1] - GRID_WIDTH) / GRID_WIDTH),
                          int((pos[0] - GRID_WIDTH) / GRID_WIDTH))
                print("Human's action: ", action)
                huihe += 1
                if state[action[0], action[1]] != 0:
                    continue
                board = utils.step(board, action)  # 人类落子
                last_action = action
                state_str = utils.board_to_state(board)
                # player.pruning_tree(board, state_str)
                game_over, value = utils.is_game_over(board, config.goal)
                state = board
                draw_background(screen)
                draw_stone(screen)
                pygame.display.flip()
                image_data = pygame.surfarray.array3d(
                    pygame.display.get_surface())
                frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5))
                if game_over:
                    continue
                _, action = player.get_action(state_str,
                                              last_action=last_action,
                                              random_a=False)
                last_action = action
                print("AI's action ", action)
                huihe += 1
                board = utils.step(
                    utils.state_to_board(state_str, config.board_size), action)
                state_str = utils.board_to_state(board)
                player.pruning_tree(board, state_str)  # 走完一步以后,对其他分支进行剪枝,以节约内存
                game_over, value = utils.is_game_over(board, config.goal)
                state = -board
                draw_background(screen)
                draw_stone(screen)
                pygame.display.flip()
                image_data = pygame.surfarray.array3d(
                    pygame.display.get_surface())
                frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5))
        if game_over:
            if i == 0:
                print(f"game over, total {(huihe+1)//2} rounds")
                if huihe == config.batch_size * config.batch_size:
                    print("game tied!")
                elif huihe % 2 == 1 and players[idx] == AI:
                    print("AI won! You are stupid!")
                else:
                    print("you won!, You niubi")
            i += 1
            image_data = pygame.surfarray.array3d(pygame.display.get_surface())
            frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5))
            if i >= 5 and make_gif:
                break

    pygame.quit()
    if make_gif:
        print("game finished, start to write to gif.")
        gif = imageio.mimsave("tmp/five_6960.gif", frames, 'GIF', duration=1.0)
    print("done!")