Exemplo n.º 1
0
    def _playout(self, game_state: go.GameState):
        """Run a single playout from the root to the leaf, getting a value at
        the leaf and propagating it back through its parents.
        State is modified in-place, so a copy must be provided.
        """
        node = self._root
        while True:
            if node.is_leaf():
                break
            action, node = node.select(self._c_puct)
            if action == PASS_MOVE:
                move = PASS_MOVE
            else:
                move = (action // game_state.size, action % game_state.size)
            game_state.do_move(move)

        if not game_state.is_end_of_game and len(game_state.get_legal_moves(False)) > 0:
            action_probs, leaf_value = self._policy_value_net.policy_value_fn(
                game_state
            )
            node.expand(action_probs)
        else:
            winner = game_state.get_winner()
            # for end state,return the "true" leaf_value
            if winner is None:  # tie
                leaf_value = 0.0
            else:
                leaf_value = 1.0 if winner == game_state.current_player else -1.0

        # Update value and visit count of nodes in this traversal.
        node.update_recursive(-leaf_value)
Exemplo n.º 2
0
    def _evaluate_rollout(self, game_state: go.GameState, limit=1000):
        """Use the rollout policy to play until the end of the game,
        returning +1 if the current player wins, -1 if the opponent wins,
        and 0 if it is a tie.
        """
        player = game_state.current_player
        for _ in range(limit):
            if game_state.is_end_of_game or len(
                    game_state.get_legal_moves(False)) == 0:
                break

            action_probs = _rollout_policy_fn(game_state)
            max_action = max(action_probs, key=itemgetter(1))[0]
            if max_action == PASS_MOVE:
                move = PASS_MOVE
            else:
                move = (max_action // game_state.size,
                        max_action % game_state.size)
            game_state.do_move(move)
        else:
            # If no break from the loop, issue a warning.
            print("WARNING: rollout reached move limit")
        winner = game_state.get_winner()
        if winner is None:  # tie
            return 0.0
        else:
            return 1.0 if winner == player else -1.0