def main(_): rng = np.random.RandomState(FLAGS.seed) # Make sure poker is compiled into the library, as it requires an optional # dependency: the ACPC poker code. To ensure it is compiled in, prepend both # the install.sh and build commands with OPEN_SPIEL_BUILD_WITH_ACPC=ON. # See here: # https://github.com/deepmind/open_spiel/blob/master/docs/install.md#configuration-conditional-dependencies # for more details on optional dependencies. games_list = pyspiel.registered_names() assert "universal_poker" in games_list fcpa_game_string = pyspiel.hunl_game_string("fcpa") print("Creating game: {}".format(fcpa_game_string)) game = pyspiel.load_game(fcpa_game_string) agents = [ LoadAgent(FLAGS.player0, game, 0, rng), LoadAgent(FLAGS.player1, game, 1, rng) ] state = game.new_initial_state() # Print the initial state print("INITIAL STATE") print(str(state)) while not state.is_terminal(): # The state can be three different types: chance node, # simultaneous node, or decision node current_player = state.current_player() if state.is_chance_node(): # Chance node: sample an outcome outcomes = state.chance_outcomes() num_actions = len(outcomes) print("Chance node with " + str(num_actions) + " outcomes") action_list, prob_list = zip(*outcomes) action = rng.choice(action_list, p=prob_list) print("Sampled outcome: ", state.action_to_string(state.current_player(), action)) state.apply_action(action) else: # Decision node: sample action for the single current player legal_actions = state.legal_actions() for action in legal_actions: print("Legal action: {} ({})".format( state.action_to_string(current_player, action), action)) action = agents[current_player].step(state) action_string = state.action_to_string(current_player, action) print("Player ", current_player, ", chose action: ", action_string) state.apply_action(action) print("") print("NEXT STATE:") print(str(state)) # Game is now done. Print utilities for each player returns = state.returns() for pid in range(game.num_players()): print("Utility for player {} is {}".format(pid, returns[pid]))
def test_registered_names(self): game_names = pyspiel.registered_names() # Specify game names in alphabetical order, to make the test easier to read. expected = set([ "backgammon", "blotto", "breakthrough", "bridge", "bridge_uncontested_bidding", "catch", "chess", "cliff_walking", "coin_game", "connect_four", "coop_box_pushing", "coop_to_1p", "deep_sea", "first_sealed_auction", "go", "goofspiel", "havannah", "hex", "kuhn_poker", "laser_tag", "leduc_poker", "liars_dice", "markov_soccer", "matching_pennies_3p", "matrix_cd", "matrix_coordination", "matrix_mp", "matrix_pd", "matrix_rps", "matrix_rpsw", "matrix_sh", "matrix_shapleys_game", "misere", "negotiation", "normal_form_extensive_game", "oshi_zumo", "oware", "pentago", "phantom_ttt", "pig", "quoridor", "tic_tac_toe", "tiny_bridge_2p", "tiny_bridge_4p", "tiny_hanabi", "turn_based_simultaneous_game", "y", ]) if os.environ.get("BUILD_WITH_HANABI", "OFF") == "ON": expected.add("hanabi") if os.environ.get("BUILD_WITH_ACPC", "OFF") == "ON": expected.add("universal_poker") expected = sorted(list(expected)) self.assertCountEqual(game_names, expected)
def test_registered_names(self): game_names = pyspiel.registered_names() expected = EXPECTED_GAMES if os.environ.get("BUILD_WITH_HANABI", "OFF") == "ON": expected.add("hanabi") if os.environ.get("BUILD_WITH_ACPC", "OFF") == "ON": expected.add("universal_poker") expected = sorted(list(expected)) self.assertCountEqual(game_names, expected)
def test_registered_names(self): game_names = pyspiel.registered_names() # Specify game names in alphabetical order, to make the test easier to read. expected = [ "backgammon", "blotto", "breakthrough", "bridge_uncontested_bidding", "catch", "chess", "coin_game", "connect_four", "coop_box_pushing", "first_sealed_auction", "go", "goofspiel", "havannah", "hex", "kuhn_poker", "laser_tag", "leduc_poker", "liars_dice", "markov_soccer", "matching_pennies_3p", "matrix_cd", "matrix_coordination", "matrix_mp", "matrix_pd", "matrix_rps", "matrix_rpsw", "matrix_sh", "matrix_shapleys_game", "misere", "negotiation", "oshi_zumo", "oware", "pentago", "phantom_ttt", "pig", "quoridor", "tic_tac_toe", "tiny_bridge_2p", "tiny_bridge_4p", "tiny_hanabi", "turn_based_simultaneous_game", "y", ] self.assertCountEqual(game_names, expected)
def test_run_hanabi(self): # Hanabi is an optional game, so check we have it before running the test. game = "hanabi" if game not in pyspiel.registered_names(): return num_players = 3 env_configs = { "players": num_players, "max_life_tokens": 1, "colors": 2, "ranks": 3, "hand_size": 2, "max_information_tokens": 3, "discount": 0. } env = rl_environment.Environment(game, **env_configs) info_state_size = env.observation_spec()["info_state"][0] num_actions = env.action_spec()["num_actions"] with self.session() as sess: agents = [ policy_gradient.PolicyGradient( # pylint: disable=g-complex-comprehension sess, player_id=player_id, info_state_size=info_state_size, num_actions=num_actions, hidden_layers_sizes=[8, 8], batch_size=16, entropy_cost=0.001, critic_learning_rate=0.01, pi_learning_rate=0.01, num_critic_before_pi=4) for player_id in range(num_players) ] sess.run(tf.global_variables_initializer()) time_step = env.reset() while not time_step.last(): current_player = time_step.observations["current_player"] agent_output = [agent.step(time_step) for agent in agents] time_step = env.step([agent_output[current_player].action]) for agent in agents: agent.step(time_step)
def test_run_landlord(self): # landlord is an optional game, so check we have it before running the test. game = "landlord" if game not in pyspiel.registered_names(): return num_players = 3 env_configs = { } env = rl_environment.Environment(game, **env_configs) state_size = env.observation_spec()["info_state"][0] num_actions = env.action_spec()["num_actions"] with self.session() as sess: agents = [ dqn.DQN( # pylint: disable=g-complex-comprehension sess, player_id, state_representation_size=state_size, num_actions=num_actions, hidden_layers_sizes=[16], replay_buffer_capacity=10, batch_size=5) for player_id in range(num_players) ] sess.run(tf.global_variables_initializer()) time_step = env.reset() while not time_step.last(): current_player = time_step.observations["current_player"] #agent_output = [agent.step(time_step) for agent in agents] #time_step = env.step([agent_output[current_player].action]) if env.is_turn_based: agent_output = agents[current_player].step(time_step) action_list = [agent_output.action] else: agents_output = [agent.step(time_step) for agent in agents] action_list = [agent_output.action for agent_output in agents_output] print_iteration(time_step, current_player, action_list) time_step = env.step(action_list) for agent in agents: agent.step(time_step)
def test_run_hanabi(self): # Hanabi is an optional game, so check we have it before running the test. game = "hanabi" if game not in pyspiel.registered_names(): return num_players = 3 env_configs = { "players": num_players, "max_life_tokens": 1, "colors": 2, "ranks": 3, "hand_size": 2, "max_information_tokens": 3, "discount": 0. } env = rl_environment.Environment(game, **env_configs) state_size = env.observation_spec()["info_state"][0] num_actions = env.action_spec()["num_actions"] with self.session() as sess: agents = [ dqn.DQN( # pylint: disable=g-complex-comprehension sess, player_id, state_representation_size=state_size, num_actions=num_actions, hidden_layers_sizes=[16], replay_buffer_capacity=10, batch_size=5) for player_id in range(num_players) ] sess.run(tf.global_variables_initializer()) time_step = env.reset() while not time_step.last(): current_player = time_step.observations["current_player"] agent_output = [agent.step(time_step) for agent in agents] time_step = env.step([agent_output[current_player].action]) for agent in agents: agent.step(time_step)
from __future__ import absolute_import from __future__ import division from __future__ import print_function import os from absl import logging from absl.testing import absltest from open_spiel.python.algorithms import generate_playthrough import pyspiel _DATA_DIR = "open_spiel/integration_tests/playthroughs/" _OPTIONAL_GAMES = frozenset(["hanabi", "universal_poker"]) _AVAILABLE_GAMES = set(pyspiel.registered_names()) def _is_optional_game(basename): """Returns (bool, game_name or None). Args: basename: The basename of the file. It is assumed it starts with the game name. """ for game_name in _OPTIONAL_GAMES: if basename.startswith(game_name): return True, game_name return False, None
import unittest from absl.testing import absltest from absl.testing import parameterized import numpy as np from open_spiel.python.algorithms import get_all_states import pyspiel _MANDATORY_PARAMETERS_GAMES = [ "misere", "turn_based_simultaneous_game", "normal_form_extensive_game" ] _GAMES_TO_TEST = list( set(pyspiel.registered_names()) - set(_MANDATORY_PARAMETERS_GAMES)) # The list of game instances to test on the full tree as tuples # (name to display, string to pass to load_game). _GAMES_FULL_TREE_TRAVERSAL_TESTS = [ ("catch", "catch(rows=6,columns=3)"), ("cliff_walking", "cliff_walking(horizon=7)"), ("deep_sea", "deep_sea(size=3)"), ("kuhn_poker", "kuhn_poker"), ("leduc_poker", "leduc_poker"), ("iigoofspiel4", "turn_based_simultaneous_game(game=goofspiel(" "imp_info=True,num_cards=4,points_order=descending))"), ("kuhn_poker3p", "kuhn_poker(players=3)"), ("first_sealed_auction", "first_sealed_auction(max_value=2)"), ("tiny_hanabi", "tiny_hanabi"), ("nf_auction", "turn_based_simultaneous_game(game="
# Times out (to investigate) "backgammon", # Likely too large for depth limit 5 (huge branching factor). "breakthrough", "bridge_uncontested_bidding", "havannah", "hex", "chess", "go", "pentago", # Mandatory parameters "misere", "turn_based_simultaneous_game", "y", ] _GAMES_TO_TEST = list(set(pyspiel.registered_names()) - set(_EXCLUDED_GAMES)) # The list of game instances to test on the full tree as tuples # (name to display, string to pass to load_game). _GAMES_FULL_TREE_TRAVERSAL_TESTS = [ ("catch", "catch"), ("kuhn_poker", "kuhn_poker"), ("leduc_poker", "leduc_poker"), # Disabled as this slows down the test significantly. (12s to 150s). # Enable it to check the game when you modify it. # ("liars_dice", "liars_dice"), ("iigoofspiel4", "turn_based_simultaneous_game(game=goofspiel(" "imp_info=True,num_cards=4,points_order=descending))"), ("kuhn_poker3p", "kuhn_poker(players=3)"), ("first_sealed_auction", "first_sealed_auction(max_value=2)"), ]