def test_net_vs_random(policy_fn, game_name, **kwargs):
    game = pyspiel.load_game(game_name)

    # Alphazero first
    zero_bot = NeuralNetBot(game, 0, policy_fn)
    random_bot = pyspiel.make_uniform_random_bot(game, 1, np.random.randint(0, 1000))
    score1 = play_game(game, zero_bot, random_bot)

    # Random bot first
    zero_bot = NeuralNetBot(game, 1, policy_fn)
    random_bot = pyspiel.make_uniform_random_bot(game, 0, np.random.randint(0, 1000))
    score2 = -play_game(game, random_bot, zero_bot)
    return score1, score2
def test_zero_vs_random(policy_fn):
    game = pyspiel.load_game('connect_four')

    # Alphazero first
    zero_bot = AlphaZeroBot(game, 0, policy_fn=policy_fn, use_dirichlet=False)
    random_bot = pyspiel.make_uniform_random_bot(game, 1, np.random.randint(0, 1000))
    score1 = play_game(game, zero_bot, random_bot)

    # Random bot first
    zero_bot = AlphaZeroBot(game, 1, policy_fn=policy_fn, use_dirichlet=False)
    random_bot = pyspiel.make_uniform_random_bot(game, 0, np.random.randint(0, 1000))
    score2 = -play_game(game, random_bot, zero_bot)
    return score1, score2, None
示例#3
0
def main(unused_argv):
  uct_c = 2
  game = pyspiel.load_game(FLAGS.game)
  state = game.new_initial_state()
  print("Initial state: ", str(state))

  # Check that the games satisfies the conditions for the implemented MCTS
  # algorithm
  if game.num_players() not in (1, 2):
    raise ValueError("Game must be a 1-player game or 2-player zero-sum game")
  if (game.num_players() == 2 and
      game.get_type().utility != pyspiel.GameType.Utility.ZERO_SUM):
    raise ValueError("Game must be a 1-player game or 2-player zero-sum game")

  # Create MCTS bot
  evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count)
  mcts_bot = mcts.MCTSBot(game, FLAGS.mcts_player, uct_c,
                          FLAGS.max_search_nodes, evaluator)

  # Create random bot
  random_bot = pyspiel.make_uniform_random_bot(game, 1 - FLAGS.mcts_player, 123)

  if FLAGS.mcts_player == 0:
    bots = [mcts_bot, random_bot]
  else:
    bots = [random_bot, mcts_bot]

  while not state.is_terminal():
    # The state can be three different types: chance node,
    # simultaneous node, or decision node
    if state.is_chance_node():
      # Chance node: sample an outcome
      outcomes = state.chance_outcomes()
      num_actions = len(outcomes)
      print("Chance node, got " + str(num_actions) + " outcomes")
      action_list, prob_list = zip(*outcomes)
      action = np.random.choice(action_list, p=prob_list)
      print("Sampled outcome: ",
            state.action_to_string(state.current_player(), action))
      state.apply_action(action)
    elif state.is_simultaneous_node():
      raise ValueError("Game cannot have simultaneous nodes.")
    else:
      # Decision node: sample action for the single current player
      _, action = bots[state.current_player()].step(state)
      print("Player ", state.current_player(), ", randomly sampled action: ",
            state.action_to_string(state.current_player(), action))
      state.apply_action(action)

    print("Next state: ", str(state))

  # Game is now done. Print return for each player
  returns = state.returns()
  for pid in range(game.num_players()):
    print("Return for player {} is {}".format(pid, returns[pid]))
示例#4
0
 def test_python_and_cpp_bot(self):
     game = pyspiel.load_game("kuhn_poker")
     bots = [
         pyspiel.make_uniform_random_bot(0, 1234),
         uniform_random.UniformRandomBot(1, np.random.RandomState(4321)),
     ]
     results = np.array([
         pyspiel.evaluate_bots(game.new_initial_state(), bots, iteration)
         for iteration in range(10000)
     ])
     average_results = np.mean(results, axis=0)
     np.testing.assert_allclose(average_results, [0.125, -0.125], atol=0.1)
示例#5
0
class EvaluateBotsTest(parameterized.TestCase):
    @parameterized.parameters([([
        pyspiel.make_uniform_random_bot(0, 1234),
        uniform_random.UniformRandomBot(1, np.random.RandomState(4321))
    ], ), (policy_bots(), )])
    def test_cpp_vs_python(self, bots):
        results = np.array([
            evaluate_bots.evaluate_bots(GAME.new_initial_state(), bots,
                                        np.random) for _ in range(10000)
        ])
        average_results = np.mean(results, axis=0)
        np.testing.assert_allclose(average_results, [0.125, -0.125], atol=0.1)

    def test_random_vs_stateful(self):
        game = pyspiel.load_game("tic_tac_toe")
        bots = [
            pyspiel.make_stateful_random_bot(game, 0, 1234),
            uniform_random.UniformRandomBot(1, np.random.RandomState(4321))
        ]
        for _ in range(1000):
            evaluate_bots.evaluate_bots(game.new_initial_state(), bots,
                                        np.random)