Exemplo n.º 1
0
    def find_winning_move_and_score(self, env: ReversiEnv, exactly=True):
        if env.done:
            b, w = env.board.number_of_black_and_white
            return None, b - w
        if time() - self.start_time > self.timeout:
            logger.debug("timeout!")
            raise Timeout()

        turn = env.turn
        key = black, white, next_player = env.board.black, env.board.white, env.next_player
        if key in self.cache:
            return self.cache[key]

        if next_player == Player.black:
            legal_moves = find_correct_moves(black, white)
        else:
            legal_moves = find_correct_moves(white, black)

        action_list = [idx for idx in range(225) if legal_moves & (1 << idx)]
        score_list = np.zeros(len(action_list), dtype=int)
        for i, action in enumerate(action_list):
            # env.update(black, white, next_player)
            env.board.black = black
            env.board.white = white
            env.next_player = next_player
            env.turn = turn
            env.done = False
            env.winner = None
            #
            env.step(action)
            _, score = self.find_winning_move_and_score(env, exactly=exactly)
            score_list[i] = score

            if not exactly:
                # do not need to find the best score move
                if next_player == Player.black and score > 0:
                    break
                elif next_player == Player.white and score < 0:
                    break

        # print(list(zip(action_list, score_list)))

        if next_player == Player.black:
            best_action = action_list[int(np.argmax(score_list))]
            best_score = np.max(score_list)
        else:
            best_action = action_list[int(np.argmin(score_list))]
            best_score = np.min(score_list)

        self.cache[key] = (best_action, best_score)
        return best_action, best_score
Exemplo n.º 2
0
    def play_game(self, best_model, ng_model):
        env = ReversiEnv().reset()

        best_player = ReversiPlayer(self.config,
                                    best_model,
                                    play_config=self.config.eval.play_config)
        ng_player = ReversiPlayer(self.config,
                                  ng_model,
                                  play_config=self.config.eval.play_config)
        best_is_black = random() < 0.5
        if best_is_black:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        observation = env.observation
        while not env.done:
            if env.next_player == Player.black:
                action = black.action(observation.black, observation.white)
            else:
                action = white.action(observation.white, observation.black)
            observation, info = env.step(action)

        ng_win = None
        if env.winner == Winner.black:
            if best_is_black:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.white:
            if best_is_black:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_black, observation.number_of_black_and_white
Exemplo n.º 3
0
    def play_game(self, model_1, model_2):
        env = ReversiEnv().reset()

        def make_sim_env_fn():
            return env.copy()

        p1 = EvaluatePlayer(make_sim_env_fn=make_sim_env_fn, config=self.config,
                            model=model_1, play_config=self.config.eval.play_config)
        p1.prepare(env, dir_noise=False)

        p2 = EvaluatePlayer(make_sim_env_fn=make_sim_env_fn, config=self.config,
                            model=model_2, play_config=self.config.eval.play_config)
        p2.prepare(env, dir_noise=False)

        p1_is_black = random() < 0.5
        if p1_is_black:
            black, white = p1, p2
        else:
            black, white = p2, p1

        while not env.done:
            if env.next_player == Player.black:
                action, _, _ = black.think()
            else:
                action, _, _ = white.think()

            env.step(action)

            black.play(action, env)
            white.play(action, env)

        if env.black_wins:
            p1_win = p1_is_black
        elif env.black_loses:
            p1_win = not p1_is_black
        else:
            p1_win = None

        return p1_win
Exemplo n.º 4
0
    async def search_my_move(self, env: ReversiEnv, is_root_node=False):
        """

        Q, V is value for this Player(always black).
        P is value for the player of next_player (black or white)
        :param env:
        :param is_root_node:
        :return:
        """
        if env.done:
            if env.winner == Winner.black:
                return 1
            elif env.winner == Winner.white:
                return -1
            else:
                return 0

        key = self.counter_key(env)
        another_side_key = self.another_side_counter_key(env)

        while key in self.now_expanding:
            await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec)

        # is leaf?
        if key not in self.expanded:  # reach leaf node
            leaf_v = await self.expand_and_evaluate(env)
            if env.next_player == Player.black:
                return leaf_v  # Value for black
            else:
                return -leaf_v  # Value for white == -Value for black

        virtual_loss = self.config.play.virtual_loss
        virtual_loss_for_w = virtual_loss if env.next_player == Player.black else -virtual_loss

        action_t = self.select_action_q_and_u(env, is_root_node)
        _, _ = env.step(action_t)

        self.var_n[key][action_t] += virtual_loss
        self.var_w[key][action_t] -= virtual_loss_for_w
        leaf_v = await self.search_my_move(env)  # next move

        # on returning search path
        # update: N, W
        self.var_n[key][action_t] += - virtual_loss + 1
        self.var_w[key][action_t] += virtual_loss_for_w + leaf_v
        # update another side info(flip color and player)
        self.var_n[another_side_key][action_t] += 1
        self.var_w[another_side_key][action_t] -= leaf_v  # must flip the sign.
        return leaf_v
Exemplo n.º 5
0
    async def search_my_move(self, env: ReversiEnv, is_root_node=False):
        """

        Q, V is value for this Player(always black).
        P is value for the player of next_player (black or white)
        :param env:
        :param is_root_node:
        :return:
        """
        if env.done:
            if env.winner == Winner.black:
                return 1
            elif env.winner == Winner.white:
                return -1
            else:
                return 0

        key = self.counter_key(env)

        while key in self.now_expanding:
            await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec)

        # is leaf?
        if key not in self.expanded:  # reach leaf node
            leaf_v = await self.expand_and_evaluate(env)
            if env.next_player == Player.black:
                return leaf_v  # Value for black
            else:
                return -leaf_v  # Value for white == -Value for black

        action_t = self.select_action_q_and_u(env, is_root_node)
        _, _ = env.step(action_t)

        virtual_loss = self.config.play.virtual_loss
        self.var_n[key][action_t] += virtual_loss
        self.var_w[key][action_t] -= virtual_loss
        leaf_v = await self.search_my_move(env)  # next move

        # on returning search path
        # update: N, W, Q, U
        n = self.var_n[key][
            action_t] = self.var_n[key][action_t] - virtual_loss + 1
        w = self.var_w[key][
            action_t] = self.var_w[key][action_t] + virtual_loss + leaf_v
        self.var_q[key][action_t] = w / n
        return leaf_v
Exemplo n.º 6
0
 def get_next_key(self, own, enemy, action):
     env = ReversiEnv().update(own, enemy, Player.black)
     env.step(action)
     return self.counter_key(env)
Exemplo n.º 7
0
class NBoardEngine:
    def __init__(self, config: Config):
        self.config = config
        self.reader = NonBlockingStreamReader(sys.stdin)
        self.handler = NBoardProtocolVersion2(config, self)
        self.running = False
        self.nc = self.config.nboard  # shorcut
        #
        self.env = ReversiEnv().reset()
        self.model = load_model(self.config)
        self.play_config = self.config.play
        self.player = self.create_player()
        self.turn_of_nboard = None

    def create_player(self):
        logger.debug("create new ReversiPlayer()")
        return ReversiPlayer(self.config,
                             self.model,
                             self.play_config,
                             enable_resign=False)

    def start(self):
        self.running = True
        self.reader.start(push_callback=self.push_callback)
        while self.running:
            message = self.reader.readline(self.nc.read_stdin_timeout)
            if message is None:
                continue
            message = message.strip()
            logger.debug(f"> {message}")
            self.handler.handle_message(message)

    def push_callback(self, message: str):
        # note: called in another thread
        if message.startswith("ping"):  # interupt
            self.stop_thinkng()

    def stop(self):
        self.running = False

    def reply(self, message):
        logger.debug(f"< {message}")
        sys.stdout.write(message + "\n")
        sys.stdout.flush()

    def stop_thinkng(self):
        self.player.stop_thinking()

    def set_depth(self, n):
        try:
            n = int(n)
            self.play_config.simulation_num_per_move = n * self.nc.simulation_num_per_depth_about
            logger.info(
                f"set simulation_num_per_move to {self.play_config.simulation_num_per_move}"
            )
        except ValueError:
            pass

    def reset_state(self):
        self.player = self.create_player()

    def set_game(self, game_state: GameState):
        self.env.reset()
        self.env.update(game_state.black, game_state.white, game_state.player)
        self.turn_of_nboard = game_state.player
        for action in game_state.actions:
            self._change_turn()
            if action is not None:
                self.env.step(action)

    def _change_turn(self):
        if self.turn_of_nboard:
            self.turn_of_nboard = Player.black if self.turn_of_nboard == Player.white else Player.white

    def move(self, action):
        self._change_turn()
        if action is not None:
            self.env.step(action)

    def go(self) -> GoResponse:
        if self.env.next_player != self.turn_of_nboard:
            return GoResponse(None, 0, 0)

        board = self.env.board
        if self.env.next_player == Player.black:
            states = (board.black, board.white)
        else:
            states = (board.white, board.black)
        start_time = time()
        action = self.player.action(*states)
        item = self.player.ask_thought_about(*states)
        evaluation = item.values[action]
        time_took = time() - start_time
        return GoResponse(action, evaluation, time_took)

    def hint(self, n_hint):
        """

        :param n_hint:
        """
        board = self.env.board
        if self.env.next_player == Player.black:
            states = (board.black, board.white)
        else:
            states = (board.white, board.black)

        def hint_report_callback(values, visits):
            hint_list = []
            for action, visit in list(
                    sorted(enumerate(visits), key=lambda x: -x[1]))[:n_hint]:
                if visit > 0:
                    hint_list.append(
                        HintResponse(action, values[action], visit))
            self.handler.report_hint(hint_list)

        callback_info = CallbackInMCTS(
            self.config.nboard.hint_callback_per_sim, hint_report_callback)
        self.player.action(*states, callback_in_mtcs=callback_info)
        item = self.player.ask_thought_about(*states)
        hint_report_callback(item.values, item.visit)
Exemplo n.º 8
0
class PlayWithHuman:
    def __init__(self, config: Config, model_dir):
        self.config = config
        self.human_color = None
        self.observers = []
        self.env = ReversiEnv().reset()
        self.model = self._load_model(model_dir)
        self.ai = None  # type: EvaluatePlayer
        self.ai_confidence = None

    def add_observer(self, observer_func):
        self.observers.append(observer_func)

    def notify_all(self, event):
        for ob_func in self.observers:
            ob_func(event)

    def start_game(self, human_is_black):
        self.human_color = Player.black if human_is_black else Player.white
        self.env = ReversiEnv().reset()

        def make_sim_env_fn():
            return self.env.copy()

        self.ai = EvaluatePlayer(make_sim_env_fn=make_sim_env_fn,
                                 config=self.config,
                                 model=self.model)
        self.ai.prepare(self.env, dir_noise=False)
        self.ai_confidence = None

    def play_next_turn(self):
        self.notify_all(GameEvent.update)

        if self.over:
            self.notify_all(GameEvent.over)
            return

        if self.next_player != self.human_color:
            self.notify_all(GameEvent.ai_move)
        elif np.amax(self.env.legal_moves) == 0:
            # pass
            print('pass move')
            pos = 64
            self.env.step(pos)
            self.ai.play(pos, self.env)

    @property
    def over(self):
        return self.env.done

    @property
    def next_player(self):
        return self.env.next_player

    def stone(self, px, py):
        """left top=(0, 0), right bottom=(7,7)"""

        pos = int(py * 8 + px)
        assert 0 <= pos < 64
        bit = 1 << pos
        if self.env.board.black & bit:
            return Player.black
        elif self.env.board.white & bit:
            return Player.white
        return None

    @property
    def number_of_black_and_white(self):
        return self.env.board.number_of_black_and_white

    def available(self, px, py):
        pos = int(py * 8 + px)
        if pos < 0 or 64 <= pos:
            return False
        own, enemy = self.env.board.black, self.env.board.white
        if self.human_color == Player.white:
            own, enemy = enemy, own
        legal_moves = find_correct_moves(own, enemy)
        return legal_moves & (1 << pos)

    def move(self, px, py):
        pos = int(py * 8 + px)
        assert 0 <= pos < 64

        if self.next_player != self.human_color:
            raise Exception('not human\'s turn!')

        self.env.step(pos)

        self.ai.play(pos, self.env)

    def _load_model(self, model_dir):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        model.create_session()
        model.load(model_dir)

        return model

    def move_by_ai(self):
        if self.next_player == self.human_color:
            raise Exception('not AI\'s turn!')

        logger.info('start thinking...')
        action, _, vs = self.ai.think()
        self.ai_confidence = vs
        logger.info('end thinking...')
        self.env.step(action)
        self.ai.play(action, self.env)

    def get_state_of_next_player(self):
        if self.next_player == Player.black:
            own, enemy = self.env.board.black, self.env.board.white
        else:
            own, enemy = self.env.board.white, self.env.board.black
        return own, enemy
Exemplo n.º 9
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.human_color = None
        self.observers = []
        self.env = ReversiEnv().reset()
        self.model = self._load_model()
        self.ai = None  # type: ReversiPlayer
        self.last_evaluation = None
        self.last_history = None  # type: HistoryItem

    def add_observer(self, observer_func):
        self.observers.append(observer_func)

    def notify_all(self, event):
        for ob_func in self.observers:
            ob_func(event)

    def start_game(self, human_is_black):
        self.human_color = Player.black if human_is_black else Player.white
        self.env = ReversiEnv().reset()
        self.ai = ReversiPlayer(self.config, self.model)

    def play_next_turn(self):
        self.notify_all(GameEvent.update)

        if self.over:
            self.notify_all(GameEvent.over)
            return

        if self.next_player != self.human_color:
            self.notify_all(GameEvent.ai_move)

    @property
    def over(self):
        return self.env.done

    @property
    def next_player(self):
        return self.env.next_player

    def stone(self, px, py):
        """left top=(0, 0), right bottom=(14,14)"""
        pos = int(py * 15 + px)
        assert 0 <= pos < 225
        bit = 1 << pos
        if self.env.board.black & bit:
            return Player.black
        elif self.env.board.white & bit:
            return Player.white
        return None

    @property
    def number_of_black_and_white(self):
        return self.env.observation.number_of_black_and_white

    def available(self, px, py):
        pos = int(py * 15 + px)
        if pos < 0 or 225 <= pos:
            return False
        own, enemy = self.env.board.black, self.env.board.white
        if self.human_color == Player.white:
            own, enemy = enemy, own
        legal_moves = find_correct_moves(own, enemy)
        return legal_moves & (1 << pos)

    def move(self, px, py):
        pos = int(py * 15 + px)
        assert 0 <= pos < 225

        if self.next_player != self.human_color:
            return False

        self.env.step(pos)

    def _load_model(self):
        return load_model(self.config)

    def move_by_ai(self):
        if self.next_player == self.human_color:
            return False

        own, enemy = self.get_state_of_next_player()
        action = self.ai.action(own, enemy)
        self.env.step(action)

        self.last_history = self.ai.ask_thought_about(own, enemy)
        self.last_evaluation = self.last_history.values[self.last_history.action]
        logger.debug(f"evaluation by ai={self.last_evaluation}")

    def get_state_of_next_player(self):
        if self.next_player == Player.black:
            own, enemy = self.env.board.black, self.env.board.white
        else:
            own, enemy = self.env.board.white, self.env.board.black
        return own, enemy
Exemplo n.º 10
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.human_color = None
        self.observers = []
        self.env = ReversiEnv().reset()
        self.model = self._load_model()
        self.ai = None  # type: ReversiPlayer
        self.last_evaluation = None
        self.last_history = None  # type: HistoryItem

    def add_observer(self, observer_func):
        self.observers.append(observer_func)

    def notify_all(self, event):
        for ob_func in self.observers:
            ob_func(event)

    def start_game(self, human_is_black):
        self.human_color = Player.black if human_is_black else Player.white
        self.env = ReversiEnv().reset()
        self.ai = ReversiPlayer(self.config, self.model)

    def play_next_turn(self):
        self.notify_all(GameEvent.update)

        if self.over:
            self.notify_all(GameEvent.over)
            return

        if self.next_player != self.human_color:
            self.notify_all(GameEvent.ai_move)

    @property
    def over(self):
        return self.env.done

    @property
    def next_player(self):
        return self.env.next_player

    def stone(self, px, py):
        """left top=(0, 0), right bottom=(7,7)"""
        pos = int(py * 8 + px)
        assert 0 <= pos < 64
        bit = 1 << pos
        if self.env.board.black & bit:
            return Player.black
        elif self.env.board.white & bit:
            return Player.white
        return None

    @property
    def number_of_black_and_white(self):
        return self.env.observation.number_of_black_and_white

    def available(self, px, py):
        pos = int(py * 8 + px)
        if pos < 0 or 64 <= pos:
            return False
        own, enemy = self.env.board.black, self.env.board.white
        if self.human_color == Player.white:
            own, enemy = enemy, own
        legal_moves = find_correct_moves(own, enemy)
        return legal_moves & (1 << pos)

    def move(self, px, py):
        pos = int(py * 8 + px)
        assert 0 <= pos < 64

        if self.next_player != self.human_color:
            return False

        self.env.step(pos)

    def _load_model(self):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        if self.config.play.use_newest_next_generation_model:
            loaded = reload_newest_next_generation_model_if_changed(
                model) or load_best_model_weight(model)
        else:
            loaded = load_best_model_weight(
                model) or reload_newest_next_generation_model_if_changed(model)
        if not loaded:
            raise RuntimeError("No models found!")
        return model

    def move_by_ai(self):
        if self.next_player == self.human_color:
            return False

        own, enemy = self.get_state_of_next_player()
        action = self.ai.action(own, enemy)
        self.env.step(action)

        self.last_history = self.ai.ask_thought_about(own, enemy)
        self.last_evaluation = self.last_history.values[
            self.last_history.action]
        logger.debug(f"evaluation by ai={self.last_evaluation}")

    def get_state_of_next_player(self):
        if self.next_player == Player.black:
            own, enemy = self.env.board.black, self.env.board.white
        else:
            own, enemy = self.env.board.white, self.env.board.black
        return own, enemy