def start_game(self, idx, search_tree): pipes = self.cur_pipes.pop() if not self.config.play.share_mtcs_info_in_self_play or \ idx % self.config.play.reset_mtcs_info_per_game == 0: search_tree = defaultdict(VisitState) if random() > self.config.play.enable_resign_rate: enable_resign = True else: enable_resign = False self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False, use_history=self.use_history) state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 # even == red; odd == black game_over = False final_move = None no_eat_count = 0 check = False no_act = [] increase_temp = False while not game_over: start_time = time() action, policy = self.player.action(state, turns, no_act, increase_temp=increase_temp) end_time = time() if action is None: logger.error(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break # if self.config.opts.log_move: # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s") history.append(action) # policys.append(policy) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done( state, need_check=True) if not game_over: if not senv.has_attack_chessman(state): logger.error(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 increase_temp = False no_act = [] if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i + 1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.error("闲着循环三次,作和棋处理") break if final_move: # policy = self.build_policy(final_move, False) history.append(final_move) # policys.append(policy) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) self.player.close() del search_tree del self.player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value if turns < 10: if random() > 0.9: store = True else: store = False else: store = True if store: data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value self.save_play_data(idx, data) self.cur_pipes.append(pipes) self.remove_play_data() return v, turns, state, store
def start_game(self, idx): sleep(random()) playouts = randint(8, 12) * 100 self.config.play.simulation_num_per_move = playouts logger.info(f"Set playouts = {self.config.play.simulation_num_per_move}") pipe1 = self.pipes_bt.pop() pipe2 = self.pipes_ng.pop() search_tree1 = defaultdict(VisitState) search_tree2 = defaultdict(VisitState) self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, debugging=False, enable_resign=False, use_history=self.hist_base) self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, debugging=False, enable_resign=False, use_history=self.hist_ng) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = self.player1 black = self.player2 logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑") else: red = self.player2 black = self.player1 logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑") state = senv.INIT_STATE history = [state] value = 0 # best model's value turns = 0 # even == red; odd == black game_over = False no_eat_count = 0 check = False increase_temp = False no_act = [] while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp) else: action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp) end_time = time() if self.config.opts.log_move: logger.debug(f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}") if action is None: logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!") value = -1 break history.append(action) state, no_eat = senv.new_step(state, action) turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i+1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i+1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = - value history.append(state) data = [] if idx % 2 == 0: data = [self.data['base']['digest'], self.data['unchecked']['digest']] else: data = [self.data['unchecked']['digest'], self.data['base']['digest']] self.player1.close() self.player2.close() if turns % 2 == 1: # black turn value = -value v = value data.append(history[0]) for i in range(turns): k = i * 2 data.append([history[k + 1], v]) v = -v self.pipes_bt.append(pipe1) self.pipes_ng.append(pipe2) return value, turns, data
def self_play_buffer(config, cur, use_history=False) -> (tuple, list): pipe = cur.pop() # borrow if random() > config.play.enable_resign_rate: enable_resign = True else: enable_resign = False player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, enable_resign=enable_resign, debugging=False, use_history=use_history) state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 game_over = False final_move = None no_eat_count = 0 check = False no_act = None increase_temp = False while not game_over: start_time = time() action, policy = player.action(state, turns, no_act, increase_temp=increase_temp) end_time = time() if action is None: print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!") value = -1 break print(f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s") # policys.append(policy) history.append(action) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i+1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i+1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: # policy = build_policy(final_move, False) history.append(final_move) # policys.append(policy) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) player.close() del player gc.collect() if turns % 2 == 1: # balck turn value = -value v = value data = [history[0]] for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value cur.append(pipe) return (turns, v), data
def self_play_buffer(config, pipes_bt, pipes_ng, idx, res_data, hist_base, hist_ng) -> (tuple, list): sleep(random()) playouts = randint(8, 12) * 100 config.play.simulation_num_per_move = playouts logger.info(f"Set playouts = {config.play.simulation_num_per_move}") pipe1 = pipes_bt.pop() # borrow pipe2 = pipes_ng.pop() player1 = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe1, enable_resign=False, debugging=False, use_history=hist_base) player2 = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe2, enable_resign=False, debugging=False, use_history=hist_ng) # even: bst = red, ng = black; odd: bst = black, ng = red if idx % 2 == 0: red = player1 black = player2 print(f"基准模型执红,待评测模型执黑") else: red = player2 black = player1 print(f"待评测模型执红,基准模型执黑") state = senv.INIT_STATE history = [state] # policys = [] value = 0 turns = 0 game_over = False final_move = None no_eat_count = 0 check = False increase_temp = False no_act = [] while not game_over: start_time = time() if turns % 2 == 0: action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp) else: action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp) end_time = time() if action is None: print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!") value = -1 break print( f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s" ) # policys.append(policy) history.append(action) try: state, no_eat = senv.new_step(state, action) except Exception as e: logger.error(f"{e}, no_act = {no_act}, policy = {policy}") game_over = True value = 0 break turns += 1 if no_eat: no_eat_count += 1 else: no_eat_count = 0 history.append(state) if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length: game_over = True value = 0 else: game_over, value, final_move, check = senv.done(state, need_check=True) no_act = [] increase_temp = False if not game_over: if not senv.has_attack_chessman(state): logger.info(f"双方无进攻子力,作和。state = {state}") game_over = True value = 0 if not game_over and not check and state in history[:-1]: free_move = defaultdict(int) for i in range(len(history) - 1): if history[i] == state: if senv.will_check_or_catch(state, history[i + 1]): no_act.append(history[i + 1]) elif not senv.be_catched(state, history[i + 1]): increase_temp = True free_move[state] += 1 if free_move[state] >= 3: # 作和棋处理 game_over = True value = 0 logger.info("闲着循环三次,作和棋处理") break if final_move: history.append(final_move) state = senv.step(state, final_move) turns += 1 value = -value history.append(state) data = [] if idx % 2 == 0: data = [res_data['base']['digest'], res_data['unchecked']['digest']] else: data = [res_data['unchecked']['digest'], res_data['base']['digest']] player1.close() player2.close() del player1, player2 gc.collect() if turns % 2 == 1: # balck turn value = -value v = value data.append(history[0]) for i in range(turns): k = i * 2 data.append([history[k + 1], value]) value = -value pipes_bt.append(pipe1) pipes_ng.append(pipe2) return (turns, v, idx), data