def test_tic_tac_toe_number_histories(self):
     game = pyspiel.load_game("tic_tac_toe")
     states = get_all_states.get_all_states(
         game,
         depth_limit=-1,
         include_terminals=True,
         include_chance_states=False,
         to_string=lambda s: s.history_str())
     self.assertLen(states, 549946)
     states = get_all_states.get_all_states(game,
                                            depth_limit=-1,
                                            include_terminals=True,
                                            include_chance_states=False,
                                            to_string=str)
     self.assertLen(states, 5478)
def main(_):
    games_list = pyspiel.registered_games()
    print("Registered games:")
    print(games_list)

    print("Creating game: " + FLAGS.game)
    if FLAGS.players is not None:
        # If passing parameters, must use game creator.
        game = pyspiel.load_game(
            FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.players)})
    else:
        # Otherwise can create directly.
        game = pyspiel.load_game(FLAGS.game)

    print("Getting all states; depth_limit = {}".format(FLAGS.depth_limit))
    all_states = get_all_states.get_all_states(game, FLAGS.depth_limit,
                                               FLAGS.include_terminals,
                                               FLAGS.include_chance_states)

    count = 0
    for state in all_states:
        print("")
        print(str(state))
        count += 1

    print("")
    print("Total: {} states.".format(count))
示例#3
0
    def test_cpp_and_python_implementations_are_identical(self, game_name):
        game = pyspiel.load_game(game_name)

        python_policy = policy.UniformRandomPolicy(game)
        pyspiel_policy = pyspiel.UniformRandomPolicy(game)

        all_states = get_all_states.get_all_states(
            game,
            depth_limit=-1,
            include_terminals=False,
            include_chance_states=False,
            to_string=lambda s: s.information_state_string())

        for current_player in range(game.num_players()):
            python_br = best_response.BestResponsePolicy(
                game, current_player, python_policy)
            cpp_br = pyspiel.TabularBestResponse(
                game, current_player,
                pyspiel_policy).get_best_response_policy()

            for state in all_states.values():
                if state.current_player() != current_player:
                    continue

                # TODO(b/141737795): Decide what to do about this.
                self.assertEqual(
                    python_br.action_probabilities(state), {
                        a: prob
                        for a, prob in cpp_br.action_probabilities(
                            state).items() if prob != 0
                    })
示例#4
0
 def test_simultaneous_python_game_get_all_state(self):
     game = pyspiel.load_game(
         "python_iterated_prisoners_dilemma(max_game_length=6)")
     states = get_all_states.get_all_states(
         game,
         depth_limit=-1,
         include_terminals=True,
         include_chance_states=False,
         to_string=lambda s: s.history_str())
     self.assertLen(states, 10921)
     states = get_all_states.get_all_states(game,
                                            depth_limit=-1,
                                            include_terminals=True,
                                            include_chance_states=False,
                                            to_string=str)
     self.assertLen(states, 5461)
    def test_simultaneous_game_noisy_policy(self, game_name):
        game = pyspiel.load_game(game_name)

        policy = openspiel_policy.UniformRandomPolicy(game)

        all_states = get_all_states.get_all_states(
            game,
            depth_limit=10,
            include_terminals=False,
            include_chance_states=False,
            to_string=lambda s: s.history_str())

        for current_player in range(game.num_players()):
            noise = noisy_policy.NoisyPolicy(policy,
                                             player_id=current_player,
                                             alpha=0.5,
                                             beta=10.)
            for state in all_states.values():
                if state.current_player() == pyspiel.PlayerId.SIMULTANEOUS:
                    for player_id in range(game.num_players()):
                        if player_id != current_player:
                            self.assertEqual(
                                policy.action_probabilities(state, player_id),
                                noise.action_probabilities(state, player_id))
                        else:
                            self.assertNotEqual(
                                policy.action_probabilities(state, player_id),
                                noise.action_probabilities(state, player_id))
    def test_cpp_and_python_implementations_are_identical(self, game_name):
        game = pyspiel.load_game(game_name)

        policy = openspiel_policy.UniformRandomPolicy(game)

        all_states = get_all_states.get_all_states(
            game,
            depth_limit=-1,
            include_terminals=False,
            include_chance_states=False,
            to_string=lambda s: s.information_state_string())

        for current_player in range(game.num_players()):
            noise = noisy_policy.NoisyPolicy(policy,
                                             player_id=current_player,
                                             alpha=0.5,
                                             beta=10.)
            for state in all_states.values():
                if state.current_player() < 0:
                    continue

                if state.current_player() != current_player:
                    self.assertEqual(policy.action_probabilities(state),
                                     noise.action_probabilities(state))
                else:
                    self.assertNotEqual(policy.action_probabilities(state),
                                        noise.action_probabilities(state))
示例#7
0
def main(_):
  games_list = pyspiel.registered_games()
  print("Registered games:")
  for game in games_list:
    print(" ", game.short_name)
  print()

  print("Creating game:", FLAGS.game)
  params = {}
  if FLAGS.players is not None:
    params["players"] = FLAGS.players
  game = pyspiel.load_game(FLAGS.game, params)

  print("Getting all states; depth_limit = {}".format(FLAGS.depth_limit))
  all_states = get_all_states.get_all_states(game, FLAGS.depth_limit,
                                             FLAGS.include_terminals,
                                             FLAGS.include_chance_states)

  count = 0
  for state in all_states:
    print(state)
    count += 1

  print()
  print("Total: {} states.".format(count))
def summarize_infostates(game, num_player=2, num_actions=2):
    num_player = num_player
    info_states = [[] for _ in range(num_player)]
    init_states = []  # all possible states after chance nodes assign
    states = get_all_states.get_all_states(game,
                                           depth_limit=-1,
                                           include_terminals=True,
                                           include_chance_states=False,
                                           to_string=lambda s: s.history_str())
    # extract all information state
    for his, state in states.items():
        if not state.is_player_node():
            continue
        cur_p = state.current_player()
        info_states[cur_p].append(state.information_state_string())

        if len(his.split(' ')) == num_player:
            init_states.append(state)

    info_states = [list(set(ele)) for ele in info_states]
    print('info states for players', info_states)

    # Generate Strategies for info states, mapping from states into actions
    num_actions = num_actions
    strategies = [
        list(range(0, math.floor(math.pow(num_actions, len(ele)))))
        for ele in info_states
    ]
    return info_states, strategies, init_states
示例#9
0
    def test_legal_actions_returns_empty_list_on_opponent(self, game_name):
        game = pyspiel.load_game(game_name)

        some_states = get_all_states.get_all_states(game,
                                                    depth_limit=5,
                                                    include_terminals=True,
                                                    include_chance_states=True)
        # We check we have some non-terminal non-random states
        self.assertTrue(
            any(not s.is_terminal() and not s.is_chance_node()
                for s in some_states.values()))

        for state in some_states.values():
            if not state.is_terminal():
                self.assertNotEqual(state.get_type(),
                                    pyspiel.StateType.TERMINAL)
                current_player = state.current_player()
                for player in range(game.num_players()):
                    if player != current_player:
                        msg = (
                            "The game {!r} does not return an empty list on "
                            "legal_actions(<not current player>)"
                        ).format(game_name)
                        # It is illegal to call legal_actions(player) on a chance node for
                        # a non chance player.
                        if not (state.is_chance_node()
                                and player != current_player):
                            self.assertEmpty(state.legal_actions(player),
                                             msg=msg)
            else:
                self.assertEqual(state.get_type(), pyspiel.StateType.TERMINAL)
示例#10
0
def compute_states_and_info_states_if_none(game,
                                           all_states=None,
                                           state_to_information_state=None):
    """Returns all_states and/or state_to_information_state for the game.

  To recompute everything, pass in None for both all_states and
  state_to_information_state. Otherwise, this function will use the passed in
  values to reconstruct either of them.

  Args:
    game: The open_spiel game.
    all_states: The result of calling get_all_states.get_all_states. Cached for
      improved performance.
    state_to_information_state: A dict mapping str(state) to
      state.information_state for every state in the game. Cached for improved
      performance.
  """
    if all_states is None:
        all_states = get_all_states.get_all_states(game,
                                                   depth_limit=-1,
                                                   include_terminals=False,
                                                   include_chance_states=False)

    if state_to_information_state is None:
        state_to_information_state = {
            state: all_states[state].information_state_string()
            for state in all_states
        }

    return all_states, state_to_information_state
示例#11
0
    def setUpClass(cls):
        super(EnforceAPIOnFullTreeBase, cls).setUpClass()

        cls.all_states = set(
            get_all_states.get_all_states(cls.game,
                                          depth_limit=-1,
                                          include_terminals=True,
                                          include_chance_states=True).values())
示例#12
0
 def test_simultaneous_game_get_all_state(self):
     game = game = pyspiel.load_game("goofspiel", {"num_cards": 3})
     states = get_all_states.get_all_states(
         game,
         depth_limit=-1,
         include_terminals=True,
         include_chance_states=False,
         to_string=lambda s: s.history_str())
     self.assertLen(states, 273)
示例#13
0
    def __init__(self,
                 game,
                 players=None,
                 to_string=lambda s: s.history_str(),
                 states=None):
        """Initializes a uniform random policy for all players in the game."""
        players = sorted(players or range(game.num_players()))
        super().__init__(game, players)
        self.game_type = game.get_type()

        # Get all states in the game at which players have to make decisions unless
        # they are explicitly specified.
        states = states or get_all_states.get_all_states(
            game,
            depth_limit=-1,
            include_terminals=False,
            include_chance_states=False,
            include_mean_field_states=False,
            to_string=to_string)

        # Assemble legal actions for every valid (state, player) pair, keyed by
        # information state string.
        self.state_lookup = {}
        self.states_per_player = [[] for _ in range(game.num_players())]
        self.states = []
        legal_actions_list = []
        state_in_list = []
        for player in players:
            # States are ordered by their history.
            for _, state in sorted(states.items(), key=lambda pair: pair[0]):
                if state.is_simultaneous_node(
                ) or player == state.current_player():
                    legal_actions = state.legal_actions_mask(player)
                    if any(legal_actions):
                        key = self._state_key(state, player)
                        if key not in self.state_lookup:
                            state_index = len(legal_actions_list)
                            self.state_lookup[key] = state_index
                            legal_actions_list.append(legal_actions)
                            self.states_per_player[player].append(key)
                            self.states.append(state)
                            if self.game_type.provides_information_state_tensor:
                                state_in_list.append(
                                    state.information_state_tensor(player))
                            elif self.game_type.provides_observation_tensor:
                                state_in_list.append(
                                    state.observation_tensor(player))

        # Put legal action masks in a numpy array and create the uniform random
        # policy.
        self.state_in = None
        if state_in_list:
            self.state_in = np.array(state_in_list)
        self.legal_actions_mask = np.array(legal_actions_list)
        self.action_probability_array = (
            self.legal_actions_mask /
            np.sum(self.legal_actions_mask, axis=-1, keepdims=True))
示例#14
0
 def test_consistent(self):
     """Checks the Python and C++ game implementations are the same."""
     py_game = pyspiel.load_game("python_tic_tac_toe")
     cc_game = pyspiel.load_game("tic_tac_toe")
     py_obs = make_observation(py_game)
     cc_obs = make_observation(cc_game)
     py_states = get_all_states(py_game, to_string=str)
     cc_states = get_all_states(cc_game, to_string=str)
     self.assertCountEqual(list(cc_states), list(py_states))
     for key, cc_state in cc_states.items():
         py_state = py_states[key]
         np.testing.assert_array_equal(py_state.history(),
                                       cc_state.history())
         np.testing.assert_array_equal(py_state.returns(),
                                       cc_state.returns())
         py_obs.set_from(py_state, 0)
         cc_obs.set_from(cc_state, 0)
         np.testing.assert_array_equal(py_obs.tensor, cc_obs.tensor)
示例#15
0
def value_iteration(game, depth_limit, threshold):
    """Solves for the optimal value function of a game.

  For small games only! Solves the game using value iteration,
  with the maximum error for the value function less than threshold.
  This algorithm works for sequential 1-player games or 2-player zero-sum
  games, with or without chance nodes.

  Arguments:
    game: The game to analyze, as returned by `load_game`.
    depth_limit: How deeply to analyze the game tree. Negative means no limit, 0
      means root-only, etc.
    threshold: Maximum error for state values..

  Returns:
    A `dict` with string keys and float values, mapping string encoding of
    states to the values of those states.
  """
    if game.num_players() not in (1, 2):
        raise ValueError("Game must be a 1-player or 2-player game")
    if (game.num_players() == 2
            and game.get_type().utility != pyspiel.GameType.Utility.ZERO_SUM):
        raise ValueError("2-player games must be zero sum games")
    # We expect Value Iteration to be used with perfect information games, in
    # which `str` is assumed to display the state of the game.
    states = get_all_states.get_all_states(game,
                                           depth_limit,
                                           True,
                                           False,
                                           to_string=str)
    values = {}
    transitions = {}

    _initialize_maps(states, values, transitions)
    error = threshold + 1  # A value larger than threshold
    min_utility = game.min_utility()
    while error > threshold:
        error = 0
        for key, state in states.items():
            if state.is_terminal():
                continue
            player = state.current_player()
            value = min_utility if player == 0 else -min_utility
            for action in state.legal_actions():
                next_states = transitions[(key, action)]
                q_value = sum(p * values[next_state]
                              for next_state, p in next_states)
                if player == 0:
                    value = max(value, q_value)
                else:
                    value = min(value, q_value)
            error = max(abs(values[key] - value), error)
            values[key] = value

    return values
示例#16
0
 def test_consistent(self):
     """Checks the Python and C++ game implementations are the same."""
     py_game = pyspiel.load_game("python_kuhn_poker")
     cc_game = pyspiel.load_game("kuhn_poker")
     obs_types = [None, pyspiel.IIGObservationType(perfect_recall=True)]
     py_observations = [make_observation(py_game, o) for o in obs_types]
     cc_observations = [make_observation(cc_game, o) for o in obs_types]
     py_states = get_all_states(py_game)
     cc_states = get_all_states(cc_game)
     self.assertCountEqual(list(cc_states), list(py_states))
     for key, cc_state in cc_states.items():
         py_state = py_states[key]
         np.testing.assert_array_equal(py_state.history(),
                                       cc_state.history())
         np.testing.assert_array_equal(py_state.returns(),
                                       cc_state.returns())
         for py_obs, cc_obs in zip(py_observations, cc_observations):
             for player in (0, 1):
                 py_obs.set_from(py_state, player)
                 cc_obs.set_from(cc_state, player)
                 np.testing.assert_array_equal(py_obs.tensor, cc_obs.tensor)
示例#17
0
 def test_has_at_least_an_action(self, game_name):
     """Check that all population's state have at least one action."""
     game = pyspiel.load_game(game_name)
     to_string = lambda s: s.observation_string(pyspiel.PlayerId.
                                                DEFAULT_PLAYER_ID)
     states = get_all_states.get_all_states(game,
                                            depth_limit=-1,
                                            include_terminals=False,
                                            include_chance_states=False,
                                            include_mean_field_states=False,
                                            to_string=to_string)
     for state in states.values():
         self.assertNotEmpty(state.legal_actions())
示例#18
0
def self_train():
    env = rl_environment.Environment("kuhn_poker")
    num_actions = env.action_spec()["num_actions"]

    player1 = QLearner(0, num_actions)
    player2 = QLearner(1, num_actions)
    state_size = env.observation_spec()["info_state"][0]
    with tf.Session as sess:
        player1 = DQN(sess,
                      0,
                      11,
                      state_representation_size=state_size,
                      num_actions=num_actions)
        player1 = DQN(sess,
                      1,
                      11,
                      state_representation_size=state_size,
                      num_actions=num_actions)

    players = [player1, player2]

    iterations = 1000000
    for episode in range(iterations):
        if episode % 1000 == 0:
            print("Curr_episode", str(episode))

        time_step = env.reset()
        while not time_step.last():
            curr_player_id = time_step.current_player()
            agent_output = players[curr_player_id].step(time_step)
            time_step = env.step([agent_output.action])

        for player in players:
            player.step(time_step)

    print(player1._q_values)

    game = pyspiel.load_game("kuhn_poker")
    all_states = get_all_states.get_all_states(
        game,
        depth_limit=-1,
        include_terminals=False,
        include_chance_states=False,
        to_string=lambda s: s.information_state_string())

    #Initialized to uniform for each state
    tabular_policy = TabularPolicy(game)

    for state in all_states:
        state_policy = tabular_policy.policy_for_key(state)
        print("State: {}, state_policy: {}".format(state, state_policy))
示例#19
0
 def test_compression_binary(self):
   # All infostates for leduc are binary, so we can compress them effectively.
   game = pyspiel.load_game("leduc_poker")
   obs1 = make_observation(game, INFO_STATE_OBS_TYPE)
   obs2 = make_observation(game, INFO_STATE_OBS_TYPE)
   self.assertLen(obs1.tensor, 30)  # 30 floats = 120 bytes
   for state in get_all_states.get_all_states(game).values():
     for player in range(game.num_players()):
       obs1.set_from(state, player)
       compressed = obs1.compress()
       self.assertEqual(type(compressed), bytes)
       self.assertLen(compressed, 5)
       obs2.decompress(compressed)
       np.testing.assert_array_equal(obs1.tensor, obs2.tensor)
示例#20
0
def policy_to_dict_but_we_can_actually_use_it(player_policy,
                                              game,
                                              all_states=None,
                                              state_to_information_state=None,
                                              player_id: Optional = None):
    """Converts a Policy instance into a tabular policy represented as a dict.

    This is compatible with the C++ TabularExploitability code (i.e.
    pyspiel.exploitability, pyspiel.TabularBestResponse, etc.).

    While you do not have to pass the all_states and state_to_information_state
    arguments, creating them outside of this funciton will speed your code up
    dramatically.

    Args:
      player_policy: The policy you want to convert to a dict.
      game: The game the policy is for.
      all_states: The result of calling get_all_states.get_all_states. Can be
        cached for improved performance.
      state_to_information_state: A dict mapping str(state) to
        state.information_state for every state in the game. Can be cached for
        improved performance.

    Returns:
      A dictionary version of player_policy that can be passed to the C++
      TabularBestResponse, Exploitability, and BestResponse functions/classes.
    """
    if all_states is None:
        all_states = get_all_states.get_all_states(game,
                                                   depth_limit=-1,
                                                   include_terminals=False,
                                                   include_chance_states=False)
        state_to_information_state = {
            state: str(
                np.asarray(
                    all_states[state].information_state_as_normalized_vector(),
                    dtype=np.float32).tolist())
            for state in all_states
        }
    tabular_policy = dict()
    for state in all_states:
        if player_id is not None and all_states[state].current_player(
        ) != player_id:
            continue

        information_state = state_to_information_state[state]
        tabular_policy[information_state] = list(
            player_policy.action_probabilities(all_states[state]).items())
    return tabular_policy
def test_tabular_policy_to_csv(tmpdir):
    # Setup game and policy
    game = pyspiel.load_game("kuhn_poker")
    tabular_policy = policy.TabularPolicy(game)
    # Save policy as CSV
    output = os.path.join(tmpdir, 'policy.csv')
    policy_to_csv(game, tabular_policy, output)
    assert list(tmpdir.listdir()) == [output]
    # Check created CSV
    csv = pd.read_csv(output, index_col=0)
    # Get all states in the game at which players have to make decisions.
    states = get_all_states.get_all_states(game,
                                           depth_limit=-1,
                                           include_terminals=False,
                                           include_chance_states=False)
    assert set(csv.index.values) <= set(states.keys())
    assert len(csv.columns) == game.num_distinct_actions()
示例#22
0
def test_policy_on_game(self, game, policy_object):
    """Checks the policy conforms to the conventions.

  Checks the Policy.action_probabilities contains only legal actions (but not
  necessarily all).
  Checks that the probabilities are positive and sum to 1.

  Args:
    self: The Test class. This methid targets as being used as a utility
      function to test policies.
    game: A `pyspiel.Game`, same as the one used in the policy.
    policy_object: A `policy.Policy` object on `game`. to test.
  """

    all_states = get_all_states.get_all_states(
        game,
        depth_limit=-1,
        include_terminals=False,
        include_chance_states=False,
        to_string=lambda s: s.information_state_string())

    for state in all_states.values():
        legal_actions = set(state.legal_actions())
        action_probabilities = policy_object.action_probabilities(state)

        for action in action_probabilities.keys():
            # We want a clearer error message to be able to debug.
            actions_missing = set(legal_actions) - set(
                action_probabilities.keys())
            illegal_actions = set(
                action_probabilities.keys()) - set(legal_actions)
            self.assertIn(
                action,
                legal_actions,
                msg="The action {} is present in the policy but is not a legal "
                "actions (these are {})\n"
                "Legal actions missing from policy: {}\n"
                "Illegal actions present in policy: {}".format(
                    action, legal_actions, actions_missing, illegal_actions))

        sum_ = 0
        for prob in action_probabilities.values():
            sum_ += prob
            self.assertGreaterEqual(prob, 0)
        self.assertAlmostEqual(1, sum_)
示例#23
0
def get_tabular_policy_states(game):
    """Returns the states of the game for a tabular policy."""
    if game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD:
        # TODO(perolat): We use s.observation_string(DEFAULT_MFG_PLAYER) here as the
        # number of history is exponential on the depth of the MFG. What we really
        # need is a representation of the state. For many player Mean Field games,
        # the state will be (x0, x1, x2, ..., xn) and the observation_string(0) will
        # output the string of x0. In that case we would need something like
        # str([observation_string(i) for i in range(num_player)])
        to_string = lambda s: s.observation_string(pyspiel.PlayerId.
                                                   DEFAULT_PLAYER_ID)
    else:
        to_string = lambda s: s.history_str()
    return get_all_states.get_all_states(game,
                                         depth_limit=-1,
                                         include_terminals=False,
                                         include_chance_states=False,
                                         include_mean_field_states=False,
                                         to_string=to_string)
def test_callable_policy_to_csv(tmpdir):
    def _uniform_policy(state):
        actions = state.legal_actions()
        p = 1.0 / len(actions)
        return [(a, p) for a in actions]

    # Setup game and policy
    game = pyspiel.load_game("kuhn_poker")
    callable_policy = policy.PolicyFromCallable(game, _uniform_policy)
    # Save policy as CSV
    output = os.path.join(tmpdir, 'policy.csv')
    policy_to_csv(game, callable_policy, output)
    assert list(tmpdir.listdir()) == [output]
    # Check created CSV
    csv = pd.read_csv(output, index_col=0)
    # Get all states in the game at which players have to make decisions.
    states = get_all_states.get_all_states(game,
                                           depth_limit=-1,
                                           include_terminals=False,
                                           include_chance_states=False)
    assert set(csv.index.values) <= set(states.keys())
示例#25
0
 def test_compression_none(self):
   # Most observations for leduc have non-binary data, so we can't
   # currently compress them.
   game = pyspiel.load_game("leduc_poker")
   obs1 = make_observation(game)
   obs2 = make_observation(game)
   self.assertLen(obs1.tensor, 16)  # 16 floats = 64 bytes
   freq = collections.Counter()
   for state in get_all_states.get_all_states(game).values():
     for player in range(game.num_players()):
       obs1.set_from(state, player)
       compressed = obs1.compress()
       self.assertEqual(type(compressed), bytes)
       freq[len(compressed)] += 1
       obs2.decompress(compressed)
       np.testing.assert_array_equal(obs1.tensor, obs2.tensor)
   expected_freq = {
       3: 840,     # Compressible states take 3 bytes
       65: 17760,  # Uncompressible states take 65 bytes
   }
   self.assertEqual(freq, expected_freq)
示例#26
0
def print_policy_analysis(policies, game, verbose=False):
    """Function printing policy diversity within game's known policies.

  Warning : only works with deterministic policies.
  Args:
    policies: List of list of policies (One list per game player)
    game: OpenSpiel game object.
    verbose: Whether to print policy diversity information. (True : print)

  Returns:
    List of list of unique policies (One list per player)
  """
    states_dict = get_all_states.get_all_states(game, np.infty, False, False)
    unique_policies = []
    for player in range(len(policies)):
        cur_policies = policies[player]
        cur_set = set()
        for pol in cur_policies:
            cur_str = ""
            for state_str in states_dict:
                if states_dict[state_str].current_player() == player:
                    pol_action_dict = pol(states_dict[state_str])
                    max_prob = max(list(pol_action_dict.values()))
                    max_prob_actions = [
                        a for a in pol_action_dict
                        if pol_action_dict[a] == max_prob
                    ]
                    cur_str += "__" + state_str
                    for a in max_prob_actions:
                        cur_str += "-" + str(a)
            cur_set.add(cur_str)

        unique_policies.append(cur_set)
    if verbose:
        print("\n---------------------------\nPolicy Diversity :")
        for player, cur_set in enumerate(unique_policies):
            print("Player {} : {} unique policies.".format(
                player, len(cur_set)))
    print("")
    return unique_policies
示例#27
0
  def __call__(self, player, player_policy, info_states):
    """Computes action values per state for the player.

    Args:
      player: The id of the player (0 <= player < game.num_players()). This
        player will play `player_policy`, while the opponent will play a best
        response.
      player_policy: A `policy.Policy` object.
      info_states: A list of info state strings.

    Returns:
      A `_CalculatorReturn` nametuple. See its docstring for the documentation.
    """
    self.player = player
    opponent = 1 - player

    def best_response_policy(state):
      infostate = state.information_state_string(opponent)
      action = best_response_actions[infostate]
      return [(action, 1.0)]

    # If the policy is a TabularPolicy, we can directly copy the infostate
    # strings & values from the class. This is significantly faster than having
    # to create the infostate strings.
    if isinstance(player_policy, policy.TabularPolicy):
      tabular_policy = {
          key: _tuples_from_policy(player_policy.policy_for_key(key))
          for key in player_policy.state_lookup
      }
    # Otherwise, we have to calculate all the infostate strings everytime. This
    # is ~2x slower.
    else:
      # We cache these as they are expensive to compute & do not change.
      if self._all_states is None:
        self._all_states = get_all_states.get_all_states(
            self.game,
            depth_limit=-1,
            include_terminals=False,
            include_chance_states=False)
        self._state_to_information_state = {
            state: self._all_states[state].information_state_string()
            for state in self._all_states
        }
      tabular_policy = policy_utils.policy_to_dict(
          player_policy, self.game, self._all_states,
          self._state_to_information_state)

    # When constructed, TabularBestResponse does a lot of work; we can save that
    # work by caching it.
    if self._best_responder[player] is None:
      self._best_responder[player] = pyspiel.TabularBestResponse(
          self.game, opponent, tabular_policy)
    else:
      self._best_responder[player].set_policy(tabular_policy)

    # Computing the value at the root calculates best responses everywhere.
    history = str(self.game.new_initial_state())
    best_response_value = self._best_responder[player].value(history)
    best_response_actions = self._best_responder[
        player].get_best_response_actions()

    # Compute action values
    self._action_value_calculator.compute_all_states_action_values({
        player: player_policy,
        opponent: policy.PolicyFromCallable(self.game, best_response_policy),
    })
    obj = self._action_value_calculator._get_tabular_statistics(  # pylint: disable=protected-access
        ((player, s) for s in info_states))

    # Return values
    return _CalculatorReturn(
        exploitability=best_response_value,
        values_vs_br=obj.action_values,
        counterfactual_reach_probs_vs_br=obj.counterfactual_reach_probs,
        player_reach_probs_vs_br=obj.player_reach_probs)
示例#28
0
def value_iteration(game, depth_limit, threshold, cyclic_game=False):
    """Solves for the optimal value function of a game.

  For small games only! Solves the game using value iteration,
  with the maximum error for the value function less than threshold.
  This algorithm works for sequential 1-player games or 2-player zero-sum
  games, with or without chance nodes.

  Arguments:
    game: The game to analyze, as returned by `load_game`.
    depth_limit: How deeply to analyze the game tree. Negative means no limit, 0
      means root-only, etc.
    threshold: Maximum error for state values..
    cyclic_game: set to True if the game has cycles (from state A we can get to
      state B, and from state B we can get back to state A).


  Returns:
    A `dict` with string keys and float values, mapping string encoding of
    states to the values of those states.
  """
    assert game.num_players() in (1, 2), (
        "Game must be a 1-player or 2-player game")
    if game.num_players() == 2:
        assert game.get_type().utility == pyspiel.GameType.Utility.ZERO_SUM, (
            "2-player games must be zero sum games")

    # Must be perfect information or one-shot (not imperfect information).
    assert (game.get_type().information
            == pyspiel.GameType.Information.ONE_SHOT
            or game.get_type().information
            == pyspiel.GameType.Information.PERFECT_INFORMATION)

    # We expect Value Iteration to be used with perfect information games, in
    # which `str` is assumed to display the state of the game.
    states = get_all_states.get_all_states(game,
                                           depth_limit,
                                           True,
                                           False,
                                           to_string=str,
                                           stop_if_encountered=cyclic_game)
    values = {}
    transitions = {}

    _initialize_maps(states, values, transitions)
    error = threshold + 1  # A value larger than threshold
    min_utility = game.min_utility()
    while error > threshold:
        error = 0
        for key, state in states.items():
            if state.is_terminal():
                continue
            elif state.is_simultaneous_node():
                # Simultaneous node. Assemble a matrix game from the child utilities.
                # and solve it using a matrix game solver.
                p0_utils = []  # row player
                p1_utils = []  # col player
                row = 0
                for p0action in state.legal_actions(0):
                    # new row
                    p0_utils.append([])
                    p1_utils.append([])
                    for p1action in state.legal_actions(1):
                        # loop from left-to-right of columns
                        next_states = transitions[(key, p0action, p1action)]
                        joint_q_value = sum(p * values[next_state]
                                            for next_state, p in next_states)
                        p0_utils[row].append(joint_q_value)
                        p1_utils[row].append(-joint_q_value)
                    row += 1
                stage_game = pyspiel.create_matrix_game(p0_utils, p1_utils)
                solution = lp_solver.solve_zero_sum_matrix_game(stage_game)
                value = solution[2]
            else:
                # Regular decision node
                player = state.current_player()
                value = min_utility if player == 0 else -min_utility
                for action in state.legal_actions():
                    next_states = transitions[(key, action)]
                    q_value = sum(p * values[next_state]
                                  for next_state, p in next_states)
                    if player == 0:
                        value = max(value, q_value)
                    else:
                        value = min(value, q_value)
            error = max(abs(values[key] - value), error)
            values[key] = value

    return values
示例#29
0
    def __call__(self, player, player_policy, info_states):
        """Computes action values per state for the player.

    Args:
      player: The id of the player 0 <= player < game.num_players().
      player_policy: A `policy.Policy` object.
      info_states: A list of info state strings.

    Returns:
      A `_CalculatorReturn` nametuple. See its docstring for the documentation.
    """
        self.player = player
        opponent = 1 - player

        def best_response_policy(state):
            infostate = state.information_state_string(opponent)
            action = best_response_actions[infostate]
            return [(action, 1.0)]

        # If the policy is a TabularPolicy, we can directly copy the infostate
        # strings & values from the class. This is significantly faster than having
        # to create the infostate strings.
        if isinstance(player_policy, policy.TabularPolicy):
            tabular_policy = {
                key: _tuples_from_policy(player_policy.policy_for_key(key))
                for key in player_policy.state_lookup
            }
        # Otherwise, we have to calculate all the infostate strings everytime. This
        # is ~2x slower.
        else:
            # We cache these as they are expensive to compute & do not change.
            if self._all_states is None:
                self._all_states = get_all_states.get_all_states(
                    self.game,
                    depth_limit=-1,
                    include_terminals=False,
                    include_chance_states=False)
                self._state_to_information_state = {
                    state: self._all_states[state].information_state_string()
                    for state in self._all_states
                }
            tabular_policy = policy_utils.policy_to_dict(
                player_policy, self.game, self._all_states,
                self._state_to_information_state)

        # When constructed, TabularBestResponse does a lot of work; we can save that
        # work by caching it.
        if self._best_responder[player] is None:
            self._best_responder[player] = pyspiel.TabularBestResponse(
                self.game, opponent, tabular_policy)
        else:
            self._best_responder[player].set_policy(tabular_policy)

        # Computing the value at the root calculates best responses everywhere.
        history = str(self.game.new_initial_state())
        best_response_value = self._best_responder[player].value(history)
        best_response_actions = self._best_responder[
            player].get_best_response_actions()

        # Compute action values
        self.action_values = collections.defaultdict(
            lambda: collections.defaultdict(lambda: np.zeros(2)))
        self.info_state_prob = collections.defaultdict(float)
        self.info_state_player_prob = collections.defaultdict(float)
        self.info_state_cf_prob = collections.defaultdict(float)
        self.info_state_chance_prob = collections.defaultdict(float)
        self.get_action_values(
            self.game.new_initial_state(), {
                player:
                player_policy,
                opponent:
                policy.PolicyFromCallable(self.game, best_response_policy),
            })

        # Collect normalized action values for each information state
        rv = []
        cfrp = []
        player_reach_probs_vs_br = []
        for info_state in info_states:
            key = (player, info_state)
            av = self.action_values[key]
            norm_prob = self.info_state_prob[key]
            rv.append([(av[a][player] / norm_prob) if
                       (a in av and norm_prob > 0) else 0
                       for a in range(self.num_actions)])
            cfrp.append(self.info_state_cf_prob[key])
            player_reach_probs_vs_br.append(self.info_state_player_prob[key])

        # Return values
        return _CalculatorReturn(
            exploitability=best_response_value,
            values_vs_br=rv,
            counterfactual_reach_probs_vs_br=cfrp,
            player_reach_probs_vs_br=player_reach_probs_vs_br)