Пример #1
0
def create_games(origin,
                 destination,
                 num_vehicles,
                 graph,
                 max_time_step,
                 time_step_length=1.0,
                 departure_time=None):
    if departure_time is not None:
        raise NotImplementedError("To do.")
    list_of_vehicles = [
        dynamic_routing_utils.Vehicle(origin, destination)
        for _ in range(num_vehicles)
    ]
    game = dynamic_routing.DynamicRoutingGame(
        {
            "max_num_time_step": max_time_step,
            "time_step_length": time_step_length
        },
        network=graph,
        vehicles=list_of_vehicles)
    seq_game = pyspiel.convert_to_turn_based(game)
    od_demand = [
        dynamic_routing_utils.OriginDestinationDemand(origin, destination, 0,
                                                      num_vehicles)
    ]
    mfg_game = mean_field_routing_game.MeanFieldRoutingGame(
        {
            "max_num_time_step": max_time_step,
            "time_step_length": time_step_length
        },
        network=graph,
        od_demand=od_demand)
    return game, seq_game, mfg_game
Пример #2
0
def nfsp_measure_exploitability_nonlstm(rllib_policies: List[Policy],
                                        poker_game_version: str,
                                        open_spiel_env_config: dict = None):
    if open_spiel_env_config is None:
        if poker_game_version in ["kuhn_poker", "leduc_poker"]:
            open_spiel_env_config = {
                "players": pyspiel.GameParameter(2)
            }
        else:
            open_spiel_env_config = {}

    open_spiel_env_config = {k: pyspiel.GameParameter(v) if not isinstance(v, pyspiel.GameParameter) else v for k, v in
                             open_spiel_env_config.items()}

    openspiel_game = pyspiel.load_game(poker_game_version, open_spiel_env_config)
    if poker_game_version == "oshi_zumo":
        openspiel_game = pyspiel.convert_to_turn_based(openspiel_game)

    opnsl_policies = []
    for rllib_policy in rllib_policies:
        openspiel_policy = openspiel_policy_from_nonlstm_rllib_policy(openspiel_game=openspiel_game,
                                                                      rllib_policy=rllib_policy,
                                                                      game_version=poker_game_version,
                                                                      game_parameters=open_spiel_env_config,
        )
        opnsl_policies.append(openspiel_policy)

    nfsp_policy = JointPlayerPolicy(game=openspiel_game, policies=opnsl_policies)

    # Exploitability is NashConv / num_players
    if poker_game_version == "universal_poker":
        print("Measuring exploitability for universal_poker policy. This will take a while...")
    exploitability_result = exploitability(game=openspiel_game, policy=nfsp_policy)
    return exploitability_result
Пример #3
0
def _rollout_until_timeout(game_name,
                           time_limit,
                           give_up_after,
                           if_simultaneous_convert_to_turn_based=False):
  """Run rollouts on the specified game until the time limit.

  Args:
    game_name:      str
    time_limit:     In number of seconds
    give_up_after:  Cuts off trajectories longer than specified
    if_simultaneous_convert_to_turn_based: if the game is simultaneous and this
      boolean is true, then the game is loaded as a turn based game.

  Returns:
    A dict of collected statistics.
  """
  game = pyspiel.load_game(game_name)
  if game.get_type().dynamics == pyspiel.GameType.Dynamics.MEAN_FIELD:
    raise NotImplementedError(
        "Benchmark on mean field games is not available yet.")
  if (game.get_type().dynamics == pyspiel.GameType.Dynamics.SIMULTANEOUS and
      if_simultaneous_convert_to_turn_based):
    game = pyspiel.convert_to_turn_based(game)
  is_time_out = lambda t: time.time() - t > time_limit
  num_rollouts = 0
  num_giveups = 0
  num_moves = 0
  start = time.time()
  while not is_time_out(start):
    state = game.new_initial_state()
    while not state.is_terminal():
      if len(state.history()) > give_up_after:
        num_giveups += 1
        break
      if state.is_simultaneous_node():

        def random_choice(actions):
          if actions:
            return random.choice(actions)
          return 0

        actions = [
            random_choice(state.legal_actions(i))
            for i in range(state.num_players())
        ]
        state.apply_actions(actions)
      else:
        action = random.choice(state.legal_actions(state.current_player()))
        state.apply_action(action)
      num_moves += 1
    num_rollouts += 1
  time_elapsed = time.time() - start
  return dict(
      game_name=game_name,
      ms_per_rollouts=time_elapsed / num_rollouts * 1000,
      ms_per_moves=time_elapsed / num_moves * 1000,
      giveups_per_rollout=num_giveups / num_rollouts,
      time_elapsed=time_elapsed
  )
 def test_game_as_turn_based(self):
     """Check the game can be converted to a turn-based game."""
     game = pyspiel.load_game("python_iterated_prisoners_dilemma")
     turn_based = pyspiel.convert_to_turn_based(game)
     pyspiel.random_sim_test(turn_based,
                             num_sims=10,
                             serialize=False,
                             verbose=True)
Пример #5
0
 def test_int_mccfr_on_turn_based_game_with_exploitability(self):
   """Check if outcome sampling MCCFR can be applied."""
   game = pyspiel.load_game(
       "python_dynamic_routing(max_num_time_step=5,time_step_length=1.0)")
   seq_game = pyspiel.convert_to_turn_based(game)
   cfr_solver = outcome_mccfr.OutcomeSamplingSolver(seq_game)
   for _ in range(_NUM_ITERATION_CFR_TEST):
     cfr_solver.iteration()
   exploitability.nash_conv(seq_game, cfr_solver.average_policy())
Пример #6
0
 def test_cfr_on_turn_based_game_with_exploitability(self):
   """Check if CFR can be applied to the sequential game."""
   game = pyspiel.load_game(
       "python_dynamic_routing(max_num_time_step=5,time_step_length=1.0)")
   seq_game = pyspiel.convert_to_turn_based(game)
   cfr_solver = cfr.CFRSolver(seq_game)
   for _ in range(_NUM_ITERATION_CFR_TEST):
     cfr_solver.evaluate_and_update_policy()
   exploitability.nash_conv(seq_game, cfr_solver.average_policy())
Пример #7
0
 def test_action_consistency_convert_to_turn_based(self):
   """Check if the sequential game is consistent with the game."""
   game = pyspiel.load_game("python_dynamic_routing")
   seq_game = pyspiel.convert_to_turn_based(game)
   state = game.new_initial_state()
   seq_state = seq_game.new_initial_state()
   self.assertEqual(
       state.legal_actions(seq_state.current_player()),
       seq_state.legal_actions(),
       msg="The sequential actions are not correct.")
Пример #8
0
def psro_measure_exploitability_nonlstm(
        br_checkpoint_path_tuple_list: List[Tuple[str, str]],
        metanash_weights: List[Tuple[float, float]],
        set_policy_weights_fn: Callable,
        rllib_policies: List[Policy],
        poker_game_version: str,
        open_spiel_env_config: dict = None):
    if open_spiel_env_config is None:
        if poker_game_version in ["kuhn_poker", "leduc_poker"]:
            open_spiel_env_config = {"players": pyspiel.GameParameter(2)}
        else:
            open_spiel_env_config = {}

    open_spiel_env_config = {
        k: pyspiel.GameParameter(v)
        if not isinstance(v, pyspiel.GameParameter) else v
        for k, v in open_spiel_env_config.items()
    }

    openspiel_game = pyspiel.load_game(poker_game_version,
                                       open_spiel_env_config)
    if poker_game_version == "oshi_zumo":
        openspiel_game = pyspiel.convert_to_turn_based(openspiel_game)

    def policy_iterable():
        for checkpoint_path_tuple in br_checkpoint_path_tuple_list:
            openspiel_policies = []
            for player, player_rllib_policy in enumerate(rllib_policies):
                checkpoint_path = checkpoint_path_tuple[player]
                if checkpoint_path not in _psro_tabular_policies_cache:
                    set_policy_weights_fn(player_rllib_policy,
                                          checkpoint_path=checkpoint_path)
                    single_openspiel_policy = openspiel_policy_from_nonlstm_rllib_policy(
                        openspiel_game=openspiel_game,
                        rllib_policy=player_rllib_policy,
                        game_version=poker_game_version,
                        game_parameters=open_spiel_env_config,
                    )
                    if CACHE_PSRO_TABULAR_POLICIES:
                        _psro_tabular_policies_cache[
                            checkpoint_path] = single_openspiel_policy
                else:
                    single_openspiel_policy = _psro_tabular_policies_cache[
                        checkpoint_path]

                openspiel_policies.append(single_openspiel_policy)
            yield openspiel_policies

    avg_policies = tabular_policies_from_weighted_policies(
        game=openspiel_game,
        policy_iterable=policy_iterable(),
        weights=metanash_weights)

    joint_player_policy = JointPlayerPolicy(game=openspiel_game,
                                            policies=avg_policies)

    # Exploitability is NashConv / num_players
    if poker_game_version == "universal_poker":
        print(
            "Measuring exploitability for universal_poker policy. This will take a while..."
        )
    exploitability_result = exploitability(game=openspiel_game,
                                           policy=joint_player_policy)
    return exploitability_result
Пример #9
0
 def test_game_as_turn_based(self):
   """Check the game can be converted to a turn-based game."""
   game = pyspiel.load_game("python_dynamic_routing")
   turn_based = pyspiel.convert_to_turn_based(game)
   pyspiel.random_sim_test(
       turn_based, num_sims=10, serialize=False, verbose=True)
Пример #10
0
 def test_creation_of_rl_environment(self):
   """Check if RL environment can be created."""
   game = pyspiel.load_game("python_dynamic_routing")
   seq_game = pyspiel.convert_to_turn_based(game)
   rl_environment.Environment(seq_game)