def test_net_vs_random(policy_fn, game_name, **kwargs): game = pyspiel.load_game(game_name) # Alphazero first zero_bot = NeuralNetBot(game, 0, policy_fn) random_bot = pyspiel.make_uniform_random_bot(game, 1, np.random.randint(0, 1000)) score1 = play_game(game, zero_bot, random_bot) # Random bot first zero_bot = NeuralNetBot(game, 1, policy_fn) random_bot = pyspiel.make_uniform_random_bot(game, 0, np.random.randint(0, 1000)) score2 = -play_game(game, random_bot, zero_bot) return score1, score2
def test_zero_vs_random(policy_fn): game = pyspiel.load_game('connect_four') # Alphazero first zero_bot = AlphaZeroBot(game, 0, policy_fn=policy_fn, use_dirichlet=False) random_bot = pyspiel.make_uniform_random_bot(game, 1, np.random.randint(0, 1000)) score1 = play_game(game, zero_bot, random_bot) # Random bot first zero_bot = AlphaZeroBot(game, 1, policy_fn=policy_fn, use_dirichlet=False) random_bot = pyspiel.make_uniform_random_bot(game, 0, np.random.randint(0, 1000)) score2 = -play_game(game, random_bot, zero_bot) return score1, score2, None
def main(unused_argv): uct_c = 2 game = pyspiel.load_game(FLAGS.game) state = game.new_initial_state() print("Initial state: ", str(state)) # Check that the games satisfies the conditions for the implemented MCTS # algorithm if game.num_players() not in (1, 2): raise ValueError("Game must be a 1-player game or 2-player zero-sum game") if (game.num_players() == 2 and game.get_type().utility != pyspiel.GameType.Utility.ZERO_SUM): raise ValueError("Game must be a 1-player game or 2-player zero-sum game") # Create MCTS bot evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count) mcts_bot = mcts.MCTSBot(game, FLAGS.mcts_player, uct_c, FLAGS.max_search_nodes, evaluator) # Create random bot random_bot = pyspiel.make_uniform_random_bot(game, 1 - FLAGS.mcts_player, 123) if FLAGS.mcts_player == 0: bots = [mcts_bot, random_bot] else: bots = [random_bot, mcts_bot] while not state.is_terminal(): # The state can be three different types: chance node, # simultaneous node, or decision node if state.is_chance_node(): # Chance node: sample an outcome outcomes = state.chance_outcomes() num_actions = len(outcomes) print("Chance node, got " + str(num_actions) + " outcomes") action_list, prob_list = zip(*outcomes) action = np.random.choice(action_list, p=prob_list) print("Sampled outcome: ", state.action_to_string(state.current_player(), action)) state.apply_action(action) elif state.is_simultaneous_node(): raise ValueError("Game cannot have simultaneous nodes.") else: # Decision node: sample action for the single current player _, action = bots[state.current_player()].step(state) print("Player ", state.current_player(), ", randomly sampled action: ", state.action_to_string(state.current_player(), action)) state.apply_action(action) print("Next state: ", str(state)) # Game is now done. Print return for each player returns = state.returns() for pid in range(game.num_players()): print("Return for player {} is {}".format(pid, returns[pid]))
def test_python_and_cpp_bot(self): game = pyspiel.load_game("kuhn_poker") bots = [ pyspiel.make_uniform_random_bot(0, 1234), uniform_random.UniformRandomBot(1, np.random.RandomState(4321)), ] results = np.array([ pyspiel.evaluate_bots(game.new_initial_state(), bots, iteration) for iteration in range(10000) ]) average_results = np.mean(results, axis=0) np.testing.assert_allclose(average_results, [0.125, -0.125], atol=0.1)
class EvaluateBotsTest(parameterized.TestCase): @parameterized.parameters([([ pyspiel.make_uniform_random_bot(0, 1234), uniform_random.UniformRandomBot(1, np.random.RandomState(4321)) ], ), (policy_bots(), )]) def test_cpp_vs_python(self, bots): results = np.array([ evaluate_bots.evaluate_bots(GAME.new_initial_state(), bots, np.random) for _ in range(10000) ]) average_results = np.mean(results, axis=0) np.testing.assert_allclose(average_results, [0.125, -0.125], atol=0.1) def test_random_vs_stateful(self): game = pyspiel.load_game("tic_tac_toe") bots = [ pyspiel.make_stateful_random_bot(game, 0, 1234), uniform_random.UniformRandomBot(1, np.random.RandomState(4321)) ] for _ in range(1000): evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random)