Пример #1
0
 def serialize_deserialize(self, game, state):
     # OpenSpiel native serialization
     ser_str = pyspiel.serialize_game_and_state(game, state)
     new_game, new_state = pyspiel.deserialize_game_and_state(ser_str)
     self.assertEqual(str(game), str(new_game))
     self.assertEqual(str(state), str(new_state))
     # Pickle serialization + deserialization (of the state).
     pickled_state = pickle.dumps(state)
     unpickled_state = pickle.loads(pickled_state)
     self.assertEqual(str(state), str(unpickled_state))
Пример #2
0
 def test_pickle(self):
     """Checks pickling and unpickling works."""
     game = pyspiel.load_game("python_tic_tac_toe")
     state = game.new_initial_state()
     for a in [4, 2, 3, 7]:
         state.apply_action(a)
     ser_str = pyspiel.serialize_game_and_state(game, state)
     new_game, new_state = pyspiel.deserialize_game_and_state(ser_str)
     self.assertEqual(str(game), str(new_game))
     self.assertEqual(str(state), str(new_state))
     pickled_state = pickle.dumps(state)
     unpickled_state = pickle.loads(pickled_state)
     self.assertEqual(str(state), str(unpickled_state))
Пример #3
0
  def get_time_step(self, actions):
    """Returns a `TimeStep` without updating the environment.

    Returns:
      A `TimeStep` namedtuple containing:
        observation: list of dicts containing one observations per player, each
          corresponding to `observation_spec()`.
        reward: list of rewards at this timestep, or None if step_type is
          `StepType.FIRST`.
        discount: list of discounts in the range [0, 1], or None if step_type is
          `StepType.FIRST`.
        step_type: A `StepType` value.
    """
    observations = {
        "info_state": [],
        "legal_actions": [],
        "current_player": [],
        "serialized_state": [],
        "last_action": []
    }
    rewards = []
    step_type = StepType.LAST if self._state.is_terminal() else StepType.MID

    self._should_reset = step_type == StepType.LAST

    cur_rewards = self._state.rewards()
    for player_id in range(self.num_players):
      rewards.append(cur_rewards[player_id])
      observations["info_state"].append(
          self._state.observation_tensor(player_id) if self._use_observation
          else self._state.information_state_tensor(player_id))

      observations["legal_actions"].append(self._state.legal_actions(player_id))
      observations["last_action"].append(actions[player_id])

    observations["current_player"] = self._state.current_player()
    discounts = self._discounts
    if step_type == StepType.LAST:
      # When the game is in a terminal state set the discount to 0.
      discounts = [0. for _ in discounts]

    if self._include_full_state:
      observations["serialized_state"] = pyspiel.serialize_game_and_state(
          self._game, self._state)

    return TimeStep(
        observations=observations,
        rewards=rewards,
        discounts=discounts,
        step_type=step_type)
Пример #4
0
    def reset(self):
        """Starts a new sequence and returns the first `TimeStep` of this sequence.

    Returns:
      A `TimeStep` namedtuple containing:
        observations: list of dicts containing one observations per player, each
          corresponding to `observation_spec()`.
        rewards: list of rewards at this timestep, or None if step_type is
          `StepType.FIRST`.
        discounts: list of discounts in the range [0, 1], or None if step_type
          is `StepType.FIRST`.
        step_type: A `StepType` value.
    """
        self._should_reset = False
        if self._game.get_type(
        ).dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD and self._num_players > 1:
            self._state = self._game.new_initial_state_for_population(
                self._mfg_population)
        else:
            self._state = self._game.new_initial_state()
        self._sample_external_events()

        observations = {
            "info_state": [],
            "legal_actions": [],
            "current_player": [],
            "serialized_state": []
        }
        for player_id in range(self.num_players):
            observations["info_state"].append(
                self._state.observation_tensor(player_id) if self.
                _use_observation else self._state.
                information_state_tensor(player_id))
            observations["legal_actions"].append(
                self._state.legal_actions(player_id))
        observations["current_player"] = self._state.current_player()

        if self._include_full_state:
            observations[
                "serialized_state"] = pyspiel.serialize_game_and_state(
                    self._game, self._state)

        return TimeStep(observations=observations,
                        rewards=None,
                        discounts=None,
                        step_type=StepType.FIRST)
Пример #5
0
 def serialize_deserialize(self, game, state):
     ser_str = pyspiel.serialize_game_and_state(game, state)
     new_game, new_state = pyspiel.deserialize_game_and_state(ser_str)
     self.assertEqual(str(game), str(new_game))
     self.assertEqual(str(state), str(new_state))