def run(self, e=0.25): """ 对弈一局,获得一条数据,即从初始到游戏结束的一条数据 :return: """ state = self.get_init_state() game_over = False data = [] # 收集(状态,动作)二元组 value = 0 last_action = None while not game_over: policy, action = self.get_action(state, e, last_action) data.append((state, policy, last_action)) # 装初始局面不装最终局面,装的是动作执行之前的局面 board = utils.step(utils.state_to_board(state, self.config.board_size), action) state = utils.board_to_state(board) # self.pruning_tree(board, state) # 走完一步以后,对其他分支进行剪枝,以节约内存;注释掉,以节约时间 game_over, value = utils.is_game_over(board, self.goal) # assert value != 1.0 last_action = action self.reset() # 把树重启 turns = len(data) if turns % 2 == 1: value = -value weights = utils.construct_weights(turns, gamma=self.config.gamma) final_data = [] for i in range(turns): final_data.append((*data[i], value, weights[i])) # (状态,policy,last_action, value, weight) value = -value return final_data
def MCTS_search(self, state: str, history: list, last_action: tuple): """ 以state为根节点进行MCTS搜索,搜索历史保存在histoty之中 :param state: 一个字符串代表的当前状态,根节点 :param history: 包含当前状态的一个列表 :param last_action: 上一次的落子位置 :return: """ while True: board = utils.state_to_board(state, self.config.board_size) game_over, v = utils.is_game_over(board, self.goal) # 落子前检查game over if game_over: self.update_tree(v, history=history) break if state not in self.tree: # 未出现过的state,则评估然后展开 v = self.evaluate_and_expand(state, board, last_action) # 落子前进行评估 self.update_tree(v, history=history) break sel_action = self.select_action_q_and_u(state) # 根据state选择一个action history.append(sel_action) # 放进action board = utils.step(board, sel_action) state = utils.board_to_state(board) history.append(state) last_action = sel_action
def pruning_tree(self, board: np.ndarray, state: str = None): """ 主游戏前进一步以后,可以对树进行剪枝,只保留前进的那一步所对应的子树 :param board: :param state: :return: """ if state is None: state = utils.board_to_state(board) keys = list(self.tree.keys()) for key in keys: b = utils.state_to_board(key, self.config.board_size) if key != state \ and np.all(np.where(board == 1, 1, 0) >= np.where(b == 1, 1, 0)) \ and np.all(np.where(board == -1, 1, 0) >= np.where(b == -1, 1, 0)): del self.tree[key]
def main(): config.simulation_per_step = 500 # 先只搜索6060以上的 all_ckpts = [ os.path.join("ckpt", "alphaFive-" + str(num)) for num in range(60, 8800, 60) ][100:-1] net0 = Model(config.board_size, tf.Graph()) net0.restore(all_ckpts[0]) net1 = Model(config.board_size, tf.Graph()) net1.restore(all_ckpts[-1]) player0 = Player(config, training=False, pv_fn=net0.eval) player1 = Player(config, training=False, pv_fn=net1.eval) players = [{ 'p': player0, "win": 0, "ckpt": all_ckpts[0] }, { 'p': player1, "win": 0, "ckpt": all_ckpts[-1] }] result = open("result.txt", "a") low, high = 0, len(all_ckpts) - 1 while low < high: # 尽量让实力悬殊的ckpt进行对弈 print("") print( "==================================================================" ) print(players[0]["ckpt"] + " vs " + players[1]["ckpt"] + '...') for i in range(100): # 最多对弈100局 players[0]['p'].reset() # 每一局开始前都要重置 players[1]['p'].reset() game_over = False action = None state = player1.get_init_state() current_ids = i % 2 value = 0.0 count = 0 while not game_over: _, action = players[current_ids]['p'].get_action( state, last_action=action, random_a=True) board = utils.step( utils.state_to_board(state, config.board_size), action) state = utils.board_to_state(board) # players[current_ids].pruning_tree(board, state) # 走完一步以后,对其他分支进行剪枝,以节约内存, 不剪枝,节约时间 game_over, value = utils.is_game_over(board, config.goal) current_ids = (current_ids + 1) % 2 # 下一个选手 count += 1 if value == 0.0: # 和棋了 print(f"game: {i}, tied! all {count} turns.") continue else: print( f"game: {i} {players[(current_ids+1) % 2]['ckpt']} won! all {count} turns." ) players[(current_ids + 1) % 2]["win"] += 1 if i >= 30: # 超过24局以后,输赢悬殊太大的话,直接break w0 = players[0]["win"] w1 = players[1]["win"] if w0 == 0 or w1 == 0: break elif w0 / w1 > 2.0 or w0 / w1 < 0.5: break print_str = players[0]["ckpt"] + ": " + players[1][ "ckpt"] + f' = {players[0]["win"]}: {players[1]["win"]}' print(print_str) print(print_str, file=result, flush=True) if players[0]["win"] < players[1]["win"]: low += 1 net0.restore(all_ckpts[low]) players[0]["ckpt"] = all_ckpts[low] else: high -= 1 net1.restore(all_ckpts[high]) players[1]["ckpt"] = all_ckpts[high] players[0]["win"] = players[1]["win"] = 0 result.close() net1.close() net0.close()
def main(trained_ckpt): print(config.simulation_per_step) net = Model(config.board_size) player = Player(config, training=False, pv_fn=net.eval) net.restore(trained_ckpt) pygame.init() screen = pygame.display.set_mode((WIDTH, HEIGHT)) pygame.display.set_caption("五子棋") clock = pygame.time.Clock() base_folder = os.path.dirname(__file__) img_folder = os.path.join(base_folder, 'images') background_img = pygame.image.load(os.path.join(img_folder, 'back.png')).convert() background = pygame.transform.scale(background_img, (WIDTH, HEIGHT)) back_rect = background.get_rect() running = True frames = [] def draw_stone(screen_): for i in range(config.board_size): for j in range(config.board_size): if state[i, j] == 1: pygame.draw.circle(screen_, BLACK, (int( (j + 1.5) * GRID_WIDTH), int((i + 1.5) * GRID_WIDTH)), 16) elif state[i, j] == -1: pygame.draw.circle(screen_, WHITE, (int( (j + 1.5) * GRID_WIDTH), int((i + 1.5) * GRID_WIDTH)), 16) else: assert state[i, j] == 0 def draw_background(surf): screen.blit(background, back_rect) rect_lines = [ ((GRID_WIDTH, GRID_WIDTH), (GRID_WIDTH, HEIGHT - GRID_WIDTH)), ((GRID_WIDTH, GRID_WIDTH), (WIDTH - GRID_WIDTH, GRID_WIDTH)), ((GRID_WIDTH, HEIGHT - GRID_WIDTH), (WIDTH - GRID_WIDTH, HEIGHT - GRID_WIDTH)), ((WIDTH - GRID_WIDTH, GRID_WIDTH), (WIDTH - GRID_WIDTH, HEIGHT - GRID_WIDTH)), ] for line in rect_lines: pygame.draw.line(surf, BLACK, line[0], line[1], 2) for i in range(config.board_size): pygame.draw.line(surf, BLACK, (GRID_WIDTH * (2 + i), GRID_WIDTH), (GRID_WIDTH * (2 + i), HEIGHT - GRID_WIDTH)) pygame.draw.line(surf, BLACK, (GRID_WIDTH, GRID_WIDTH * (2 + i)), (HEIGHT - GRID_WIDTH, GRID_WIDTH * (2 + i))) circle_center = [ (GRID_WIDTH * 4, GRID_WIDTH * 4), (WIDTH - GRID_WIDTH * 4, GRID_WIDTH * 4), (WIDTH - GRID_WIDTH * 4, HEIGHT - GRID_WIDTH * 4), (GRID_WIDTH * 4, HEIGHT - GRID_WIDTH * 4), ] for cc in circle_center: pygame.draw.circle(surf, BLACK, cc, 5) game_over = False state_str = player.get_init_state() board = utils.state_to_board(state_str, config.board_size) state = board draw_background(screen) pygame.display.flip() image_data = pygame.surfarray.array3d(pygame.display.get_surface()) frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5)) turn = 0 i = 0 while running: clock.tick(FPS) for event in pygame.event.get(): if event.type == pygame.QUIT: running = False break action = None if not game_over: _, action = player.get_action(state_str, last_action=action) board = utils.step( utils.state_to_board(state_str, config.board_size), action) state_str = utils.board_to_state(board) # player.pruning_tree(board, state_str) # 走完一步以后,对其他分支进行剪枝,以节约内存 game_over, value = utils.is_game_over(board, config.goal) if turn % 2 == 1: state = board else: state = -board turn += 1 draw_background(screen) draw_stone(screen) pygame.display.flip() image_data = pygame.surfarray.array3d(pygame.display.get_surface()) frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5)) # draw_background(screen) # draw_stone(screen) # pygame.display.flip() if game_over: i += 1 image_data = pygame.surfarray.array3d(pygame.display.get_surface()) frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5)) if i >= 3: # 最终保留三帧 break pygame.quit() print("game finished, start to write to gif.") gif = imageio.mimsave("tmp/five.gif", frames, 'GIF', duration=0.8) print("done!")
def main(trained_ckpt): net = Model(config.board_size) player = Player(config, training=False, pv_fn=net.eval) net.restore(trained_ckpt) pygame.init() screen = pygame.display.set_mode((WIDTH, HEIGHT)) pygame.display.set_caption("五子棋") clock = pygame.time.Clock() base_folder = os.path.dirname(__file__) img_folder = os.path.join(base_folder, 'images') background_img = pygame.image.load(os.path.join(img_folder, 'back.png')).convert() background = pygame.transform.scale(background_img, (WIDTH, HEIGHT)) back_rect = background.get_rect() running = True frames = [] # def draw_stone(screen_): # for i in range(config.board_size): # for j in range(config.board_size): # if state[i, j] == 1: # pygame.draw.circle(screen_, BLACK, (int((i + 1.5) * GRID_WIDTH), int((j + 1.5) * GRID_WIDTH)), 16) # elif state[i, j] == -1: # pygame.draw.circle(screen_, WHITE, (int((i + 1.5) * GRID_WIDTH), int((j + 1.5) * GRID_WIDTH)), 16) # else: # assert state[i, j] == 0 def draw_stone(screen_): for i in range(config.board_size): for j in range(config.board_size): if state[i, j] == 1: pygame.draw.circle(screen_, BLACK, (int( (j + 1.5) * GRID_WIDTH), int((i + 1.5) * GRID_WIDTH)), 16) elif state[i, j] == -1: pygame.draw.circle(screen_, WHITE, (int( (j + 1.5) * GRID_WIDTH), int((i + 1.5) * GRID_WIDTH)), 16) else: assert state[i, j] == 0 def draw_background(surf): screen.blit(background, back_rect) rect_lines = [ ((GRID_WIDTH, GRID_WIDTH), (GRID_WIDTH, HEIGHT - GRID_WIDTH)), ((GRID_WIDTH, GRID_WIDTH), (WIDTH - GRID_WIDTH, GRID_WIDTH)), ((GRID_WIDTH, HEIGHT - GRID_WIDTH), (WIDTH - GRID_WIDTH, HEIGHT - GRID_WIDTH)), ((WIDTH - GRID_WIDTH, GRID_WIDTH), (WIDTH - GRID_WIDTH, HEIGHT - GRID_WIDTH)), ] for line in rect_lines: pygame.draw.line(surf, BLACK, line[0], line[1], 2) for i in range(config.board_size): pygame.draw.line(surf, BLACK, (GRID_WIDTH * (2 + i), GRID_WIDTH), (GRID_WIDTH * (2 + i), HEIGHT - GRID_WIDTH)) pygame.draw.line(surf, BLACK, (GRID_WIDTH, GRID_WIDTH * (2 + i)), (HEIGHT - GRID_WIDTH, GRID_WIDTH * (2 + i))) circle_center = [ (GRID_WIDTH * 4, GRID_WIDTH * 4), (WIDTH - GRID_WIDTH * 4, GRID_WIDTH * 4), (WIDTH - GRID_WIDTH * 4, HEIGHT - GRID_WIDTH * 4), (GRID_WIDTH * 4, HEIGHT - GRID_WIDTH * 4), ] for cc in circle_center: pygame.draw.circle(surf, BLACK, cc, 5) draw_background(screen) pygame.display.flip() image_data = pygame.surfarray.array3d(pygame.display.get_surface()) frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5)) players = [HUMAN, AI] # 0 表示人类玩家,2表示包含network的AI idx = int(input("input the fist side, (0 human), (1 AI), (2 exit): ")) while idx not in [0, 1, 2]: idx = int(input("input the fist side, (0 human), (1 AI), (2 exit): ")) if idx == 2: exit() if players[idx] == AI: print("AI first") else: print("Human first") game_over = False state_str = player.get_init_state() board = utils.state_to_board(state_str, config.board_size) state = board last_action = None huihe = 0 if players[idx] == AI: _, action = player.get_action(state_str, last_action=last_action) print("AI's action, ", action) huihe += 1 board = utils.step(utils.state_to_board(state_str, config.board_size), action) state_str = utils.board_to_state(board) # player.pruning_tree(board, state_str) # 走完一步以后,对其他分支进行剪枝,以节约内存 game_over, value = utils.is_game_over(board, config.goal) state = -board draw_background(screen) draw_stone(screen) pygame.display.flip() image_data = pygame.surfarray.array3d(pygame.display.get_surface()) frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5)) i = 0 while running: clock.tick(FPS) for event in pygame.event.get(): if event.type == pygame.QUIT: running = False break elif event.type == pygame.MOUSEBUTTONDOWN: if game_over: break pos = event.pos # 获得的坐标是(x, y) if out_of_boundry(pos): continue action = (int((pos[1] - GRID_WIDTH) / GRID_WIDTH), int((pos[0] - GRID_WIDTH) / GRID_WIDTH)) print("Human's action: ", action) huihe += 1 if state[action[0], action[1]] != 0: continue board = utils.step(board, action) # 人类落子 last_action = action state_str = utils.board_to_state(board) # player.pruning_tree(board, state_str) game_over, value = utils.is_game_over(board, config.goal) state = board draw_background(screen) draw_stone(screen) pygame.display.flip() image_data = pygame.surfarray.array3d( pygame.display.get_surface()) frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5)) if game_over: continue _, action = player.get_action(state_str, last_action=last_action, random_a=False) last_action = action print("AI's action ", action) huihe += 1 board = utils.step( utils.state_to_board(state_str, config.board_size), action) state_str = utils.board_to_state(board) player.pruning_tree(board, state_str) # 走完一步以后,对其他分支进行剪枝,以节约内存 game_over, value = utils.is_game_over(board, config.goal) state = -board draw_background(screen) draw_stone(screen) pygame.display.flip() image_data = pygame.surfarray.array3d( pygame.display.get_surface()) frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5)) if game_over: if i == 0: print(f"game over, total {(huihe+1)//2} rounds") if huihe == config.batch_size * config.batch_size: print("game tied!") elif huihe % 2 == 1 and players[idx] == AI: print("AI won! You are stupid!") else: print("you won!, You niubi") i += 1 image_data = pygame.surfarray.array3d(pygame.display.get_surface()) frames.append(cv2.resize(image_data, (0, 0), fx=0.5, fy=0.5)) if i >= 5 and make_gif: break pygame.quit() if make_gif: print("game finished, start to write to gif.") gif = imageio.mimsave("tmp/five_6960.gif", frames, 'GIF', duration=1.0) print("done!")