Пример #1
0
 def info_best_move(self, action, value, depth):
     self.end_time = time()
     if not self.is_red_turn:
         value = -value
     score = int(value * 1000)
     duration = self.end_time - self.start_time
     nps = int(depth * 100 / duration) * 1000
     print(f"info depth {depth} score {score} time {int(duration * 1000)} nps {nps}")
     logger.debug(f"info depth {depth} score {score} time {int((self.end_time - self.start_time) * 1000)}")
     sys.stdout.flush()
     # get ponder
     state = senv.step(self.state, action)
     ponder = None
     if state in self.search_tree:
         node = self.search_tree[state]
         cnt = 0
         for mov, action_state in node.a.items():
             if action_state.n > cnt:
                 ponder = mov
                 cnt = action_state.n
     if not self.is_red_turn:
         action = flip_move(action)
     action = senv.to_uci_move(action)
     output = f"bestmove {action}"
     if ponder:
         if self.is_red_turn:
             ponder = flip_move(ponder)
         ponder = senv.to_uci_move(ponder)
         output += f" ponder {ponder}"
     print(output)
     logger.debug(output)
     sys.stdout.flush()
Пример #2
0
    def MCTS_search(self, env: CChessEnv, is_root_node=False, tid=0) -> float:
        """
        Monte Carlo Tree Search
        """
        if env.done:
            if env.winner == Winner.draw:
                return 0
            else:
                return -1

        state = self.get_state_key(env)

        with self.node_lock[state]:
            if state not in self.tree:
                # Expand and Evaluate
                leaf_p, leaf_v = self.expand_and_evaluate(env)
                # self.neural_net_out_p, self.neural_net_out_v = leaf_p, leaf_v
                self.tree[state].p = leaf_p
                self.tree[state].legal_moves = self.get_legal_moves(env)
                return leaf_v

            if tid in self.tree[state].visit:  # loop
                return 0

            # Select
            self.tree[state].visit.append(tid)
            sel_action = self.select_action_q_and_u(state, is_root_node)

            if sel_action is None:
                return -1

            virtual_loss = self.config.play.virtual_loss
            self.tree[state].sum_n += virtual_loss

            action_state = self.tree[state].a[sel_action]
            action_state.n += virtual_loss
            action_state.w -= virtual_loss
            action_state.q = action_state.w / action_state.n

        if env.red_to_move:
            env.step(sel_action)
        else:
            env.step(flip_move(sel_action))

        leaf_v = self.MCTS_search(env, not is_root_node, tid)
        leaf_v = -leaf_v

        # Backup
        # update N, W, Q
        with self.node_lock[state]:
            node = self.tree[state]
            node.visit.remove(tid)
            node.sum_n = node.sum_n - virtual_loss + 1

            action_state = node.a[sel_action]
            action_state.n += 1 - virtual_loss
            action_state.w += leaf_v + virtual_loss
            action_state.q = action_state.w / action_state.n

        return leaf_v
Пример #3
0
def get_catch_list(state, moves=None):
    catch_list = set()
    if not moves:
        moves = get_legal_moves(state)
    for mov in moves:
        next_state, no_eat = new_step(state, mov)
        if not no_eat:  # 有吃子
            # 判断能不能吃回来(防御)
            could_defend = False
            next_moves = get_legal_moves(next_state)
            fliped_move = flip_move(mov)
            dest = fliped_move[2:]
            for nmov in next_moves:
                if nmov[2:] == dest:
                    could_defend = True
                    break
            if not could_defend:
                i = int(mov[1])
                j = int(mov[0])
                black_board = state_to_board(state)
                if black_board[i][j] == 'p' and i <= 4:
                    continue
                m = int(mov[3])
                n = int(mov[2])
                if black_board[m][n] == 'P' and m > 4:
                    continue
                # 判断是否为兑
                if black_board[m][n].upper() == black_board[i][j].upper():
                    continue
                # print(f"Catch: mov = {mov}, chessman = {black_board[i][j]}")
                catch_list.add(
                    (black_board[i][j], i, j, black_board[m][n], m, n))
    return catch_list
Пример #4
0
def test_static_env():
    from cchess_alphazero.environment.env import CChessEnv
    import cchess_alphazero.environment.static_env as senv
    from cchess_alphazero.environment.static_env import INIT_STATE
    from cchess_alphazero.environment.lookup_tables import flip_move
    env = CChessEnv()
    env.reset()
    print("env:  " + env.observation)
    print("senv: " + INIT_STATE)
    state = INIT_STATE
    env.step('0001')
    state = senv.step(state, '0001')
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.step('7770')
    state = senv.step(state, flip_move('7770'))
    print(senv.evaluate(state))
    print("env:  " + env.observation)
    print("senv: " + state)
    env.render()
    board = senv.state_to_board(state)
    for i in range(9, -1, -1):
        print(board[i])
    print("env: ")
    print(env.input_planes()[0+7:3+7])
    print("senv: ")
    print(senv.state_to_planes(state)[0+7:3+7])
    print(f"env:  {env.board.legal_moves()}" )
    print(f"senv: {senv.get_legal_moves(state)}")
    print(set(env.board.legal_moves()) == set(senv.get_legal_moves(state)))
Пример #5
0
    def start(self, human_first=True):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=False)
        self.human_move_first = human_first

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()

        while not self.env.board.is_end():
            if human_first == self.env.red_to_move:
                self.env.board.calc_chessmans_moving_list()
                is_correct_chessman = False
                is_correct_position = False
                chessman = None
                while not is_correct_chessman:
                    title = "请输入棋子位置: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(
                        input_chessman_pos[1])
                    chessman = self.env.board.chessmans[x][y]
                    if chessman != None and chessman.is_red == self.env.board.is_red_turn:
                        is_correct_chessman = True
                        print(f"当前棋子为{chessman.name_cn},可以落子的位置有:")
                        for point in chessman.moving_list:
                            print(point.x, point.y)
                    else:
                        print("没有找到此名字的棋子或未轮到此方走子")
                while not is_correct_position:
                    title = "请输入落子的位置: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(
                        input_chessman_pos[1])
                    is_correct_position = chessman.move(x, y)
                    if is_correct_position:
                        self.env.board.print_to_cl()
                        self.env.board.clear_chessmans_moving_list()
            else:
                action, policy = self.ai.action(self.env.get_state(),
                                                self.env.num_halfmoves)
                if not self.env.red_to_move:
                    action = flip_move(action)
                if action is None:
                    print("AI投降了!")
                    break
                self.env.step(action)
                print(f"AI选择移动 {action}")
                self.env.board.print_to_cl()

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()
Пример #6
0
 def cmd_position(self):
     '''
     position {fen <fenstring> | startpos } [moves <move1> .... <moven>]
     '''
     if not self.is_ready:
         return
     move_idx = -1
     if len(self.args) > 0:
         if self.args[0] == 'fen':
             # init with fen string
             fen = self.args[1]
             try:
                 self.state = senv.fen_to_state(fen)
             except Exception as e:
                 logger.error(f"cmd position error! cmd = {self.args}, {e}")
                 return
             self.history = [self.state]
             turn = self.args[2]
             if turn == 'b':
                 self.state = senv.fliped_state(self.state)
                 self.is_red_turn = False
                 self.turns = (int(self.args[6]) - 1) * 2 + 1
             else:
                 self.is_red_turn = True
                 self.turns = (int(self.args[6]) - 1) * 2
             if len(self.args) > 7 and self.args[7] == 'moves':
                 move_idx = 8
         elif self.args[0] == 'startpos':
             self.state = senv.INIT_STATE
             self.is_red_turn = True
             self.history = [self.state]
             self.turns = 0
             if len(self.args) > 1 and self.args[1] == 'moves':
                 move_idx = 2
         elif self.args[0] == 'moves':
             move_idx = 1
     else:
         self.state = senv.INIT_STATE
         self.is_red_turn = True
         self.history = [self.state]
         self.turns = 0
     logger.debug(f"state = {self.state}")
     # senv.render(self.state)
     # execute moves
     if move_idx != -1:
         for i in range(move_idx, len(self.args)):
             action = senv.parse_ucci_move(self.args[i])
             if not self.is_red_turn:
                 action = flip_move(action)
             self.history.append(action)
             self.state = senv.step(self.state, action)
             self.is_red_turn = not self.is_red_turn
             self.turns += 1
             self.history.append(self.state)
         logger.debug(f"state = {self.state}")
    def start(self, human_first=True):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe,
                              enable_resign=True, debugging=False)
        self.human_move_first = human_first

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()

        while not self.env.board.is_end():
            if human_first == self.env.red_to_move:
                self.env.board.calc_chessmans_moving_list()
                is_correct_chessman = False
                is_correct_position = False
                chessman = None
                while not is_correct_chessman:
                    title = "请输入棋子位置: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(input_chessman_pos[1])
                    chessman = self.env.board.chessmans[x][y]
                    if chessman != None and chessman.is_red == self.env.board.is_red_turn:
                        is_correct_chessman = True
                        print(f"当前棋子为{chessman.name_cn},可以落子的位置有:")
                        for point in chessman.moving_list:
                            print(point.x, point.y)
                    else:
                        print("没有找到此名字的棋子或未轮到此方走子")
                while not is_correct_position:
                    title = "请输入落子的位置: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(input_chessman_pos[1])
                    is_correct_position = chessman.move(x, y)
                    if is_correct_position:
                        self.env.board.print_to_cl()
                        self.env.board.clear_chessmans_moving_list()
            else:
                action, policy = self.ai.action(self.env.get_state(), self.env.num_halfmoves)
                if not self.env.red_to_move:
                    action = flip_move(action)
                if action is None:
                    print("AI投降了!")
                    break
                self.env.step(action)
                print(f"AI选择移动 {action}")
                self.env.board.print_to_cl()

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()
Пример #8
0
def test_sl():
    from cchess_alphazero.worker import sl
    from cchess_alphazero.config import Config
    from cchess_alphazero.environment.lookup_tables import ActionLabelsRed, flip_policy, flip_move
    c = Config('mini')
    labels_n = len(ActionLabelsRed)
    move_lookup = {move: i for move, i in zip(ActionLabelsRed, range(labels_n))}
    slworker = sl.SupervisedWorker(c)
    p1 = slworker.build_policy('0001', False)
    print(p1[move_lookup['0001']])
    p2 = slworker.build_policy('0001', True)
    print(p2[move_lookup[flip_move('0001')]])
Пример #9
0
def test_onegreen():
    import cchess_alphazero.environment.static_env as senv
    from cchess_alphazero.environment.lookup_tables import flip_move
    init = '9999299949999999249999869999999958999999519999999999999999997699'
    state = senv.init(init)
    print(state)
    senv.render(state)
    move = senv.parse_onegreen_move('8685')
    state = senv.step(state, move)
    print(state)
    senv.render(state)
    move = senv.parse_onegreen_move('7666')
    state = senv.step(state, flip_move(move))
    print(state)
    senv.render(state)
Пример #10
0
def test_ucci():
    import cchess_alphazero.environment.static_env as senv
    from cchess_alphazero.environment.lookup_tables import flip_move
    state = senv.INIT_STATE
    state = senv.step(state, '0001')
    fen = senv.state_to_fen(state, 1)
    print(fen)
    senv.render(state)
    move = 'b7b0'
    move = senv.parse_ucci_move(move)
    print(f'Parsed move {move}')
    move = flip_move(move)
    print(f'fliped move {move}')
    state = senv.step(state, move)
    senv.render(state)
    fen = senv.state_to_fen(state, 2)
    print(fen)
Пример #11
0
 def print_depth_info(self, state, turns, start_time, value, no_act):
     '''
     info depth xx pv xxx
     '''
     depth = self.done_tasks // 100
     end_time = time()
     pv = ""
     i = 0
     while i < 20:
         node = self.tree[state]
         bestmove = None
         root = True
         n = 0
         if len(node.a) == 0:
             break
         for mov, action_state in node.a.items():
             if action_state.n >= n:
                 if root and no_act and mov in no_act:
                     continue
                 n = action_state.n
                 bestmove = mov
         if bestmove is None:
             logger.error(
                 f"state = {state}, turns = {turns}, no_act = {no_act}, root = {root}, len(as) = {len(node.a)}"
             )
             break
         state = senv.step(state, bestmove)
         root = False
         if turns % 2 == 1:
             bestmove = flip_move(bestmove)
         bestmove = senv.to_uci_move(bestmove)
         pv += " " + bestmove
         i += 1
         turns += 1
     if state in self.debug:
         _, value = self.debug[state]
         if turns % 2 != self.side:
             value = -value
     score = int(value * 1000)
     duration = end_time - start_time
     nps = int(depth * 100 / duration) * 1000
     output = f"info depth {depth} score {score} time {int(duration * 1000)} pv" + pv + f" nps {nps}"
     print(output)
     logger.debug(output)
     sys.stdout.flush()
Пример #12
0
    def start(self):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=False)

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()
        history = [self.env.get_state()]

        while not self.env.board.is_end():
            no_act = None
            state = self.env.get_state()
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            action, _ = self.ai.action(state, self.env.num_halfmoves, no_act)
            history.append(action)
            if action is None:
                print("AI投降了!")
                break
            move = self.env.board.make_single_record(int(action[0]),
                                                     int(action[1]),
                                                     int(action[2]),
                                                     int(action[3]))
            if not self.env.red_to_move:
                action = flip_move(action)
            self.env.step(action)
            history.append(self.env.get_state())
            print(f"AI选择移动 {move}")
            self.env.board.print_to_cl()
            sleep(1)

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()
Пример #13
0
 def ai_move(self):
     ai_move_first = not self.human_move_first
     history = [self.env.get_state()]
     no_act = None
     while not self.env.done:
         if ai_move_first == self.env.red_to_move:
             labels = ActionLabelsRed
             labels_n = len(ActionLabelsRed)
             self.ai.search_results = {}
             state = self.env.get_state()
             if state in history[:-1]:
                 no_act = []
                 for i in range(len(history) - 1):
                     if history[i] == state:
                         no_act.append(history[i + 1])
                 if no_act != []:
                     logger.debug(f"no_act = {no_act}")
             action, policy = self.ai.action(state, self.env.num_halfmoves, no_act)
             if action is None:
                 logger.info("AI has resigned!")
                 return
             history.append(action)
             if not self.env.red_to_move:
                 action = flip_move(action)
             key = self.env.get_state()
             p, v = self.ai.debug[key]
             logger.info(f"NN value = {v:.3f}")
             self.nn_value = v
             logger.info("MCTS results:")
             self.mcts_moves = {}
             for move, action_state in self.ai.search_results.items():
                 move_cn = self.env.board.make_single_record(int(move[0]), int(move[1]), int(move[2]), int(move[3]))
                 logger.info(f"move: {move_cn}-{move}, visit count: {action_state[0]}, Q_value: {action_state[1]:.3f}, Prior: {action_state[2]:.3f}")
                 self.mcts_moves[move_cn] = action_state
             x0, y0, x1, y1 = int(action[0]), int(action[1]), int(action[2]), int(action[3])
             chessman_sprite = select_sprite_from_group(self.chessmans, x0, y0)
             sprite_dest = select_sprite_from_group(self.chessmans, x1, y1)
             if sprite_dest:
                 self.chessmans.remove(sprite_dest)
                 sprite_dest.kill()
             chessman_sprite.move(x1, y1, self.chessman_w, self.chessman_h)
             history.append(self.env.get_state())
    def start(self):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe,
                              enable_resign=True, debugging=False)

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()
        history = [self.env.get_state()]

        while not self.env.board.is_end():
            no_act = None
            state = self.env.get_state()
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            action, _ = self.ai.action(state, self.env.num_halfmoves, no_act)
            history.append(action)
            if action is None:
                print("AI投降了!")
                break
            move = self.env.board.make_single_record(int(action[0]), int(action[1]), int(action[2]), int(action[3]))
            if not self.env.red_to_move:
                action = flip_move(action)
            self.env.step(action)
            history.append(self.env.get_state())
            print(f"AI选择移动 {move}")
            self.env.board.print_to_cl()
            sleep(1)

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()
Пример #15
0
 def ai_move(self):
     ai_move_first = not self.human_move_first
     self.history = [self.env.get_state()]
     no_act = None
     while not self.env.done:
         if ai_move_first == self.env.red_to_move:
             labels = ActionLabelsRed
             labels_n = len(ActionLabelsRed)
             self.ai.search_results = {}
             state = self.env.get_state()
             logger.info(f"state = {state}")
             _, _, _, check = senv.done(state, need_check=True)
             if not check and state in self.history[:-1]:
                 no_act = []
                 free_move = defaultdict(int)
                 for i in range(len(self.history) - 1):
                     if self.history[i] == state:
                         # 如果走了下一步是将军或捉:禁止走那步
                         if senv.will_check_or_catch(
                                 state, self.history[i + 1]):
                             no_act.append(self.history[i + 1])
                         # 否则当作闲着处理
                         else:
                             free_move[state] += 1
                             if free_move[state] >= 2:
                                 # 作和棋处理
                                 self.env.winner = Winner.draw
                                 self.env.board.winner = Winner.draw
                                 break
                 if no_act:
                     logger.debug(f"no_act = {no_act}")
             action, policy = self.ai.action(state, self.env.num_halfmoves,
                                             no_act)
             if action is None:
                 logger.info("AI has resigned!")
                 return
             self.history.append(action)
             if not self.env.red_to_move:
                 action = flip_move(action)
             key = self.env.get_state()
             p, v = self.ai.debug[key]
             logger.info(f"check = {check}, NN value = {v:.3f}")
             self.nn_value = v
             logger.info("MCTS results:")
             self.mcts_moves = {}
             for move, action_state in self.ai.search_results.items():
                 move_cn = self.env.board.make_single_record(
                     int(move[0]), int(move[1]), int(move[2]), int(move[3]))
                 logger.info(
                     f"move: {move_cn}-{move}, visit count: {action_state[0]}, Q_value: {action_state[1]:.3f}, Prior: {action_state[2]:.3f}"
                 )
                 self.mcts_moves[move_cn] = action_state
             x0, y0, x1, y1 = int(action[0]), int(action[1]), int(
                 action[2]), int(action[3])
             chessman_sprite = select_sprite_from_group(
                 self.chessmans, x0, y0)
             sprite_dest = select_sprite_from_group(self.chessmans, x1, y1)
             if sprite_dest:
                 self.chessmans.remove(sprite_dest)
                 sprite_dest.kill()
             chessman_sprite.move(x1, y1, self.chessman_w, self.chessman_h)
             self.history.append(self.env.get_state())
    def start(self):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe,
                              enable_resign=True, debugging=False)

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()
        history = [self.env.get_state()]
        turns = 0
        game_over = False
        final_move = None

        while not game_over:
            if (self.ai_move_first and turns % 2 == 0) or (not self.ai_move_first and turns % 2 == 1):
                start_time = time()
                no_act = None
                state = self.env.get_state()
                if state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            act = history[i + 1]
                            if not self.env.red_to_move:
                                act = flip_move(act)
                            no_act.append(act)
                action, _ = self.ai.action(state, self.env.num_halfmoves, no_act)
                end_time = time()
                if action is None:
                    print("AlphaZero 投降了!")
                    break
                move = self.env.board.make_single_record(int(action[0]), int(action[1]), int(action[2]), int(action[3]))
                print(f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s")
                if not self.env.red_to_move:
                    action = flip_move(action)
            else:
                state = self.env.get_state()
                print(state)
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    print("Eleeye 投降了!")
                    break
                print(action)
                if not self.env.red_to_move:
                    rec_action = flip_move(action)
                else:
                    rec_action = action
                move = self.env.board.make_single_record(int(rec_action[0]), int(rec_action[1]), int(rec_action[2]), int(rec_action[3]))
                print(f"Eleeye 选择移动 {move}")
            history.append(action)
            self.env.step(action)
            history.append(self.env.get_state())
            self.env.board.print_to_cl()
            turns += 1
            sleep(1)
            game_over, final_move = self.env.board.is_end_final_move()
            print(game_over, final_move)

        if final_move:
            move = self.env.board.make_single_record(int(final_move[0]), int(final_move[1]), int(final_move[2]), int(final_move[3]))
            print(f"Final Move {move}")
            if not self.env.red_to_move:
                final_move = flip_move(final_move)
            self.env.step(final_move)
            self.env.board.print_to_cl()

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()
Пример #17
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None
        check = False

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red
                                                     and turns % 2 == 1):
                no_act = None
                if not check and state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, _ = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
            history.append(action)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            history.append(state)
            turns += 1
            value = -value

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1):
                no_act = None
                if state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, policy = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
                try:
                    policy = self.build_policy(action, False)
                except Exception as e:
                    logger.error(f"Build policy error {e}, action = {action}, state = {state}, fen = {fen}")
                    value = 0
                    break
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0 or turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store
Пример #19
0
    def start(self, human_first=True):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=False)
        self.human_move_first = human_first

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()

        while not self.env.board.is_end():
            if human_first == self.env.red_to_move:
                self.env.board.calc_chessmans_moving_list()
                is_correct_chessman = False
                is_correct_position = False
                chessman = None
                while not is_correct_chessman:
                    title = "Please enter the chess piece position: "
                    input_chessman_pos = input(title)
                    print(input_chessman_pos)
                    x, y = int(input_chessman_pos[0]), int(
                        input_chessman_pos[1])
                    chessman = self.env.board.chessmans[x][y]
                    if chessman != None and chessman.is_red == self.env.board.is_red_turn:
                        is_correct_chessman = True
                        print(
                            f"The current chess piece is {chessman.name},places where you can play:"
                        )
                        for point in chessman.moving_list:
                            print(point.x, point.y)
                    else:
                        print(
                            "No chess piece with this name was found or it was not his turn to walk"
                        )
                while not is_correct_position:
                    title = "Please enter the location of the child: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(
                        input_chessman_pos[1])
                    is_correct_position = chessman.move(x, y)
                    if is_correct_position:
                        self.env.board.print_to_cl()
                        self.env.board.clear_chessmans_moving_list()
            else:
                action, policy = self.ai.action(self.env.get_state(),
                                                self.env.num_halfmoves)
                if not self.env.red_to_move:
                    action = flip_move(action)
                if action is None:
                    print("AI surrendered!")
                    break
                self.env.step(action)
                print(f"AI chooses to move {action}")
                self.env.board.print_to_cl()

        self.ai.close()
        print(f"The winner is is {self.env.board.winner} !!!")
        self.env.board.print_record()
Пример #20
0
    def start(self):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=False)

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()
        history = [self.env.get_state()]
        turns = 0
        game_over = False
        final_move = None

        while not game_over:
            if (self.ai_move_first
                    and turns % 2 == 0) or (not self.ai_move_first
                                            and turns % 2 == 1):
                start_time = time()
                no_act = None
                state = self.env.get_state()
                if state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            act = history[i + 1]
                            if not self.env.red_to_move:
                                act = flip_move(act)
                            no_act.append(act)
                action, _ = self.ai.action(state, self.env.num_halfmoves,
                                           no_act)
                end_time = time()
                if action is None:
                    print("AlphaZero 投降了!")
                    break
                move = self.env.board.make_single_record(
                    int(action[0]), int(action[1]), int(action[2]),
                    int(action[3]))
                print(
                    f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s"
                )
                if not self.env.red_to_move:
                    action = flip_move(action)
            else:
                state = self.env.get_state()
                print(state)
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    print("Eleeye 投降了!")
                    break
                print(action)
                if not self.env.red_to_move:
                    rec_action = flip_move(action)
                else:
                    rec_action = action
                move = self.env.board.make_single_record(
                    int(rec_action[0]), int(rec_action[1]), int(rec_action[2]),
                    int(rec_action[3]))
                print(f"Eleeye 选择移动 {move}")
            history.append(action)
            self.env.step(action)
            history.append(self.env.get_state())
            self.env.board.print_to_cl()
            turns += 1
            sleep(1)
            game_over, final_move = self.env.board.is_end_final_move()
            print(game_over, final_move)

        if final_move:
            move = self.env.board.make_single_record(int(final_move[0]),
                                                     int(final_move[1]),
                                                     int(final_move[2]),
                                                     int(final_move[3]))
            print(f"Final Move {move}")
            if not self.env.red_to_move:
                final_move = flip_move(final_move)
            self.env.step(final_move)
            self.env.board.print_to_cl()

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()