def self_play_buffer(config, cur) -> (CChessEnv, list): pipes = cur.pop() # borrow env = CChessEnv(config).reset() search_tree = defaultdict(VisitState) red = CChessPlayer(config, search_tree=search_tree, pipes=pipes) black = CChessPlayer(config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: start_time = time() if env.red_to_move: action = red.action(env) else: action = black.action(env) end_time = time() logger.debug( f"Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s" ) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves / 2 >= config.play.max_game_length: env.winner = Winner.draw if cc >= 4: if env.red_to_move: env.winner = Winner.black else: env.winner = Winner.red if env.winner == Winner.red: black_win = -1 elif env.winner == Winner.black: black_win = 1 else: black_win = 0 black.finish_game(black_win) red.finish_game(-black_win) data = [] for i in range(len(red.moves)): data.append(red.moves[i]) if i < len(black.moves): data.append(black.moves[i]) cur.append(pipes) return env, data
class SelfPlayWorker: def __init__(self, config: Config, pipes=None, pid=None): self.config = config self.red = None self.black = None self.cur_pipes = pipes self.pid = pid self.buffer = [] def start(self): logger.debug( f"Selfplay#Start Process index = {self.pid}, pid = {os.getpid()}") idx = 1 self.buffer = [] search_tree = defaultdict(VisitState) while True: start_time = time() env, search_tree = self.start_game(idx, search_tree) end_time = time() logger.debug( f"Process{self.pid} play game {idx} time={end_time - start_time} sec, " f"turn={env.num_halfmoves / 2}:{env.winner}") if env.num_halfmoves <= 10: for i in range(10): logger.debug(f"{env.board.screen[i]}") idx += 1 def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() env = CChessEnv(self.config).reset() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) self.red = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes) self.black = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes) history = [] cc = 0 while not env.done: start_time = time() if env.red_to_move: action = self.red.action(env) else: action = self.black.action(env) end_time = time() logger.debug( f"Process{self.pid} Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s" ) env.step(action) history.append(action) if len(history) > 6 and history[-1] == history[-5]: cc = cc + 1 else: cc = 0 if env.num_halfmoves / 2 >= self.config.play.max_game_length: env.winner = Winner.draw if env.winner == Winner.red: red_win = 1 elif env.winner == Winner.black: red_win = -1 else: red_win = 0 if env.num_halfmoves <= 10: logger.debug(f"History moves: {history}") self.red.finish_game(red_win) self.black.finish_game(-red_win) self.cur_pipes.append(pipes) self.save_record_data(env, write=idx % self.config.play_data.nb_game_save_record == 0) self.save_play_data(idx) self.remove_play_data() return env, search_tree def save_play_data(self, idx): data = [] for i in range(len(self.red.moves)): data.append(self.red.moves[i]) if i < len(self.black.moves): data.append(self.black.moves[i]) self.buffer += data if not idx % self.config.play_data.nb_game_in_file == 0: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"Process {self.pid} save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def save_record_data(self, env, write=False): if not write: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_record_dir, rc.play_record_filename_tmpl % game_id) env.save_records(path) def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return try: for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) except: pass