def test_env(): from cchess_alphazero.environment.env import CChessEnv env = CChessEnv() env.reset() print(env.observation) env.step('0001') print(env.observation) env.step('7770') print(env.observation) env.render() print(env.input_planes()[0 + 7:3 + 7])
def self_play_buffer(config, cur) -> (CChessEnv, list): pipes = cur.pop() # borrow env = CChessEnv(config).reset() search_tree = defaultdict(VisitState) red = CChessPlayer(config, search_tree=search_tree, pipes=pipes) black = CChessPlayer(config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: start_time = time() if env.red_to_move: action = red.action(env) else: action = black.action(env) end_time = time() logger.debug( f"Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s" ) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves / 2 >= config.play.max_game_length: env.winner = Winner.draw if cc >= 4: if env.red_to_move: env.winner = Winner.black else: env.winner = Winner.red if env.winner == Winner.red: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) red.finish_game(-black_win) data = [] for i in range(len(red.moves)): data.append(red.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
class ObSelfPlay: def __init__(self, config: Config): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def start(self): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() history = [self.env.get_state()] while not self.env.board.is_end(): no_act = None state = self.env.get_state() if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: no_act.append(history[i + 1]) action, _ = self.ai.action(state, self.env.num_halfmoves, no_act) history.append(action) if action is None: print("AI投降了!") break move = self.env.board.make_single_record(int(action[0]), int(action[1]), int(action[2]), int(action[3])) if not self.env.red_to_move: action = flip_move(action) self.env.step(action) history.append(self.env.get_state()) print(f"AI选择移动 {move}") self.env.board.print_to_cl() sleep(1) self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record()
def load_game(self, init, move_list, winner, idx, title, url): turns = 0 env = CChessEnv(self.config).reset(init) red_moves = [] black_moves = [] moves = [ move_list[i:i + 4] for i in range(len(move_list)) if i % 4 == 0 ] for move in moves: action = senv.parse_onegreen_move(move) try: if turns % 2 == 0: red_moves.append([ env.observation, self.build_policy(action, flip=False) ]) else: black_moves.append([ env.observation, self.build_policy(action, flip=True) ]) env.step(action) except: logger.error( f"Invalid Action: idx = {idx}, action = {action}, turns = {turns}, moves = {moves}, " f"winner = {winner}, init = {init}, title: {title}, url: {url}" ) return turns += 1 if winner == Winner.red: red_win = 1 elif winner == Winner.black: red_win = -1 else: red_win = senv.evaluate(env.get_state()) if not env.red_to_move: red_win = -red_win for move in red_moves: move += [red_win] for move in black_moves: move += [-red_win] data = [] for i in range(len(red_moves)): data.append(red_moves[i]) if i < len(black_moves): data.append(black_moves[i]) self.buffer += data return red_win
def test_light_env(): from cchess_alphazero.environment.env import CChessEnv from cchess_alphazero.config import Config c = Config('mini') env = CChessEnv(c) env.reset() print(env.observation) env.step('0001') print(env.observation) env.step('7770') print(env.observation) env.render() print(env.input_planes()[0 + 7:3 + 7])
def load_game(self, red, black, winner, idx): env = CChessEnv(self.config).reset() red_moves = [] black_moves = [] turns = 1 black_max_turn = black['turn'].max() red_max_turn = red['turn'].max() while turns < black_max_turn or turns < red_max_turn: if turns < red_max_turn: wxf_move = red[red.turn == turns]['move'].item() action = env.board.parse_WXF_move(wxf_move) try: red_moves.append([env.observation, self.build_policy(action, flip=False)]) except Exception as e: for i in range(10): logger.debug(f"{env.board.screen[i]}") logger.debug(f"{turns} {wxf_move} {action}") env.step(action) if turns < black_max_turn: wxf_move = black[black.turn == turns]['move'].item() action = env.board.parse_WXF_move(wxf_move) try: black_moves.append([env.observation, self.build_policy(action, flip=True)]) except Exception as e: for i in range(10): logger.debug(f"{env.board.screen[i]}") logger.debug(f"{turns} {wxf_move} {action}") env.step(action) turns += 1 if winner == 'red': red_win = 1 elif winner == 'black': red_win = -1 else: red_win = 0 for move in red_moves: move += [red_win] for move in black_moves: move += [-red_win] data = [] for i in range(len(red_moves)): data.append(red_moves[i]) if i < len(black_moves): data.append(black_moves[i]) self.buffer += data
def test_onegreen2(): from cchess_alphazero.environment.env import CChessEnv import cchess_alphazero.environment.static_env as senv from cchess_alphazero.config import Config c = Config('mini') init = '9999299949999999249999869999999958999999519999999999999999997699' env = CChessEnv(c) env.reset(init) print(env.observation) env.render() move = senv.parse_onegreen_move('8685') env.step(move) print(env.observation) env.render() move = senv.parse_onegreen_move('7666') env.step(move) print(env.observation) env.render()
def load_game(self, init, move_list, winner, idx, title, url): turns = 0 env = CChessEnv(self.config).reset(init) red_moves = [] black_moves = [] moves = [move_list[i:i+4] for i in range(len(move_list)) if i % 4 == 0] for move in moves: action = senv.parse_onegreen_move(move) try: if turns % 2 == 0: red_moves.append([env.observation, self.build_policy(action, flip=False)]) else: black_moves.append([env.observation, self.build_policy(action, flip=True)]) env.step(action) except: logger.error(f"Invalid Action: idx = {idx}, action = {action}, turns = {turns}, moves = {moves}, " f"winner = {winner}, init = {init}, title: {title}, url: {url}") return turns += 1 if winner == Winner.red: red_win = 1 elif winner == Winner.black: red_win = -1 else: red_win = senv.evaluate(env.get_state()) if not env.red_to_move: red_win = -red_win for move in red_moves: move += [red_win] for move in black_moves: move += [-red_win] data = [] for i in range(len(red_moves)): data.append(red_moves[i]) if i < len(black_moves): data.append(black_moves[i]) self.buffer += data return red_win
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None self.human_move_first = True def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) self.human_move_first = human_first labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() while not self.env.board.is_end(): if human_first == self.env.red_to_move: self.env.board.calc_chessmans_moving_list() is_correct_chessman = False is_correct_position = False chessman = None while not is_correct_chessman: title = "请输入棋子位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) chessman = self.env.board.chessmans[x][y] if chessman != None and chessman.is_red == self.env.board.is_red_turn: is_correct_chessman = True print(f"当前棋子为{chessman.name_cn},可以落子的位置有:") for point in chessman.moving_list: print(point.x, point.y) else: print("没有找到此名字的棋子或未轮到此方走子") while not is_correct_position: title = "请输入落子的位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) is_correct_position = chessman.move(x, y) if is_correct_position: self.env.board.print_to_cl() self.env.board.clear_chessmans_moving_list() else: action, policy = self.ai.action(self.env.get_state(), self.env.num_halfmoves) if not self.env.red_to_move: action = flip_move(action) if action is None: print("AI投降了!") break self.env.step(action) print(f"AI选择移动 {action}") self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record()
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None self.human_move_first = True def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) self.human_move_first = human_first labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() while not self.env.board.is_end(): if human_first == self.env.red_to_move: self.env.board.calc_chessmans_moving_list() is_correct_chessman = False is_correct_position = False chessman = None while not is_correct_chessman: title = "请输入棋子位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int(input_chessman_pos[1]) chessman = self.env.board.chessmans[x][y] if chessman != None and chessman.is_red == self.env.board.is_red_turn: is_correct_chessman = True print(f"当前棋子为{chessman.name_cn},可以落子的位置有:") for point in chessman.moving_list: print(point.x, point.y) else: print("没有找到此名字的棋子或未轮到此方走子") while not is_correct_position: title = "请输入落子的位置: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int(input_chessman_pos[1]) is_correct_position = chessman.move(x, y) if is_correct_position: self.env.board.print_to_cl() self.env.board.clear_chessmans_moving_list() else: action, policy = self.ai.action(self.env.get_state(), self.env.num_halfmoves) if not self.env.red_to_move: action = flip_move(action) if action is None: print("AI投降了!") break self.env.step(action) print(f"AI选择移动 {action}") self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record()
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None self.human_move_first = True def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def start(self, human_first=True): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) self.human_move_first = human_first labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() while not self.env.board.is_end(): if human_first == self.env.red_to_move: self.env.board.calc_chessmans_moving_list() is_correct_chessman = False is_correct_position = False chessman = None while not is_correct_chessman: title = "Please enter the chess piece position: " input_chessman_pos = input(title) print(input_chessman_pos) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) chessman = self.env.board.chessmans[x][y] if chessman != None and chessman.is_red == self.env.board.is_red_turn: is_correct_chessman = True print( f"The current chess piece is {chessman.name},places where you can play:" ) for point in chessman.moving_list: print(point.x, point.y) else: print( "No chess piece with this name was found or it was not his turn to walk" ) while not is_correct_position: title = "Please enter the location of the child: " input_chessman_pos = input(title) x, y = int(input_chessman_pos[0]), int( input_chessman_pos[1]) is_correct_position = chessman.move(x, y) if is_correct_position: self.env.board.print_to_cl() self.env.board.clear_chessmans_moving_list() else: action, policy = self.ai.action(self.env.get_state(), self.env.num_halfmoves) if not self.env.red_to_move: action = flip_move(action) if action is None: print("AI surrendered!") break self.env.step(action) print(f"AI chooses to move {action}") self.env.board.print_to_cl() self.ai.close() print(f"The winner is is {self.env.board.winner} !!!") self.env.board.print_record()
class ObSelfPlayUCCI: def __init__(self, config: Config, ai_move_first=True): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None self.ai_move_first = ai_move_first def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def start(self): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() history = [self.env.get_state()] turns = 0 game_over = False final_move = None while not game_over: if (self.ai_move_first and turns % 2 == 0) or (not self.ai_move_first and turns % 2 == 1): start_time = time() no_act = None state = self.env.get_state() if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: act = history[i + 1] if not self.env.red_to_move: act = flip_move(act) no_act.append(act) action, _ = self.ai.action(state, self.env.num_halfmoves, no_act) end_time = time() if action is None: print("AlphaZero 投降了!") break move = self.env.board.make_single_record( int(action[0]), int(action[1]), int(action[2]), int(action[3])) print( f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s" ) if not self.env.red_to_move: action = flip_move(action) else: state = self.env.get_state() print(state) fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: print("Eleeye 投降了!") break print(action) if not self.env.red_to_move: rec_action = flip_move(action) else: rec_action = action move = self.env.board.make_single_record( int(rec_action[0]), int(rec_action[1]), int(rec_action[2]), int(rec_action[3])) print(f"Eleeye 选择移动 {move}") history.append(action) self.env.step(action) history.append(self.env.get_state()) self.env.board.print_to_cl() turns += 1 sleep(1) game_over, final_move = self.env.board.is_end_final_move() print(game_over, final_move) if final_move: move = self.env.board.make_single_record(int(final_move[0]), int(final_move[1]), int(final_move[2]), int(final_move[3])) print(f"Final Move {move}") if not self.env.red_to_move: final_move = flip_move(final_move) self.env.step(final_move) self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record() def get_ucci_move(self, fen, time=3): p = subprocess.Popen(self.config.resource.eleeye_path, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) setfen = f'position fen {fen}\n' setrandom = 'setoption randomness small\n' cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n' try: out, err = p.communicate(cmd, timeout=time + 0.5) except: p.kill() try: out, err = p.communicate() except Exception as e: logger.error(f"{e}, cmd = {cmd}") return self.get_ucci_move(fen, time + 1) print(out) lines = out.split('\n') if lines[-2] == 'nobestmove': return None move = lines[-2].split(' ')[1] if move == 'depth': move = lines[-1].split(' ')[6] return senv.parse_ucci_move(move)
class ObSelfPlayUCCI: def __init__(self, config: Config, ai_move_first=True): self.config = config self.env = CChessEnv() self.model = None self.pipe = None self.ai = None self.chessmans = None self.ai_move_first = ai_move_first def load_model(self): self.model = CChessModel(self.config) if self.config.opts.new or not load_best_model_weight(self.model): self.model.build() def start(self): self.env.reset() self.load_model() self.pipe = self.model.get_pipes() self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe, enable_resign=True, debugging=False) labels = ActionLabelsRed labels_n = len(ActionLabelsRed) self.env.board.print_to_cl() history = [self.env.get_state()] turns = 0 game_over = False final_move = None while not game_over: if (self.ai_move_first and turns % 2 == 0) or (not self.ai_move_first and turns % 2 == 1): start_time = time() no_act = None state = self.env.get_state() if state in history[:-1]: no_act = [] for i in range(len(history) - 1): if history[i] == state: act = history[i + 1] if not self.env.red_to_move: act = flip_move(act) no_act.append(act) action, _ = self.ai.action(state, self.env.num_halfmoves, no_act) end_time = time() if action is None: print("AlphaZero 投降了!") break move = self.env.board.make_single_record(int(action[0]), int(action[1]), int(action[2]), int(action[3])) print(f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s") if not self.env.red_to_move: action = flip_move(action) else: state = self.env.get_state() print(state) fen = senv.state_to_fen(state, turns) action = self.get_ucci_move(fen) if action is None: print("Eleeye 投降了!") break print(action) if not self.env.red_to_move: rec_action = flip_move(action) else: rec_action = action move = self.env.board.make_single_record(int(rec_action[0]), int(rec_action[1]), int(rec_action[2]), int(rec_action[3])) print(f"Eleeye 选择移动 {move}") history.append(action) self.env.step(action) history.append(self.env.get_state()) self.env.board.print_to_cl() turns += 1 sleep(1) game_over, final_move = self.env.board.is_end_final_move() print(game_over, final_move) if final_move: move = self.env.board.make_single_record(int(final_move[0]), int(final_move[1]), int(final_move[2]), int(final_move[3])) print(f"Final Move {move}") if not self.env.red_to_move: final_move = flip_move(final_move) self.env.step(final_move) self.env.board.print_to_cl() self.ai.close() print(f"胜者是 is {self.env.board.winner} !!!") self.env.board.print_record() def get_ucci_move(self, fen, time=3): p = subprocess.Popen(self.config.resource.eleeye_path, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) setfen = f'position fen {fen}\n' setrandom = 'setoption randomness small\n' cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n' try: out, err = p.communicate(cmd, timeout=time+0.5) except: p.kill() try: out, err = p.communicate() except Exception as e: logger.error(f"{e}, cmd = {cmd}") return self.get_ucci_move(fen, time+1) print(out) lines = out.split('\n') if lines[-2] == 'nobestmove': return None move = lines[-2].split(' ')[1] if move == 'depth': move = lines[-1].split(' ')[6] return senv.parse_ucci_move(move)