def test_zero_vs_mcts(policy_fn, max_search_nodes, game_name, **kwargs): game = pyspiel.load_game(game_name) # Alphazero first zero_bot = AlphaZeroBot(game, 0, policy_fn=policy_fn, use_dirichlet=False, **kwargs) mcts_bot = mcts.MCTSBot(game, 1, 1, max_search_nodes, mcts.RandomRolloutEvaluator(1)) score1 = play_game(game, zero_bot, mcts_bot) # Random bot first zero_bot = AlphaZeroBot(game, 1, policy_fn=policy_fn, use_dirichlet=False, **kwargs) mcts_bot = mcts.MCTSBot(game, 0, 1, max_search_nodes, mcts.RandomRolloutEvaluator(1)) score2 = -play_game(game, mcts_bot, zero_bot) return score1, score2, None
def test_net_vs_mcts(policy_fn, max_search_nodes, game_name, **kwargs): game = pyspiel.load_game(game_name) # Alphazero first zero_bot = NeuralNetBot(game, 0, policy_fn) mcts_bot = mcts.MCTSBot(game, 1, 1, max_search_nodes, mcts.RandomRolloutEvaluator(1)) score1 = play_game(game, zero_bot, mcts_bot) # Random bot first zero_bot = NeuralNetBot(game, 1, policy_fn) mcts_bot = mcts.MCTSBot(game, 0, 1, max_search_nodes, mcts.RandomRolloutEvaluator(1)) score2 = -play_game(game, mcts_bot, zero_bot) return score1, score2, None
def _init_bot(bot_type, game, player_id): """Initializes a bot by type.""" rng = np.random.RandomState(FLAGS.seed) if bot_type == "mcts": evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count, rng) return mcts.MCTSBot( game, FLAGS.uct_c, FLAGS.max_simulations, evaluator, random_state=rng, solve=FLAGS.solve, verbose=FLAGS.verbose) if bot_type == "az": model = az_model.Model.from_checkpoint(FLAGS.az_path) evaluator = az_evaluator.AlphaZeroEvaluator(game, model) return mcts.MCTSBot( game, FLAGS.uct_c, FLAGS.max_simulations, evaluator, random_state=rng, child_selection_fn=mcts.SearchNode.puct_value, solve=FLAGS.solve, verbose=FLAGS.verbose) if bot_type == "random": return uniform_random.UniformRandomBot(player_id, rng) if bot_type == "human": return human.HumanBot() if bot_type == "gtp": bot = gtp.GTPBot(game, FLAGS.gtp_path) for cmd in FLAGS.gtp_cmd: bot.gtp_cmd(cmd) return bot raise ValueError("Invalid bot type: %s" % bot_type)
def test_can_play_single_player(self): game = pyspiel.load_game("catch") max_simulations = 100 evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) bots = [mcts.MCTSBot(game, UCT_C, max_simulations, evaluator)] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertGreater(v[0], 0)
def test_can_play_both_sides(self): game = pyspiel.load_game("tic_tac_toe") bot = mcts.MCTSBot(game, UCT_C, max_simulations=100, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=20)) bots = [bot, bot] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertEqual(v[0] + v[1], 0)
def test_throws_on_simultaneous_game(self): game = pyspiel.load_game("matrix_mp") evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) with self.assertRaises(ValueError): mcts.MCTSBot(game, 0, UCT_C, max_simulations=100, evaluator=evaluator)
def main(unused_argv): uct_c = 2 game = pyspiel.load_game(FLAGS.game) state = game.new_initial_state() print("Initial state: ", str(state)) # Check that the games satisfies the conditions for the implemented MCTS # algorithm if game.num_players() not in (1, 2): raise ValueError("Game must be a 1-player game or 2-player zero-sum game") if (game.num_players() == 2 and game.get_type().utility != pyspiel.GameType.Utility.ZERO_SUM): raise ValueError("Game must be a 1-player game or 2-player zero-sum game") # Create MCTS bot evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count) mcts_bot = mcts.MCTSBot(game, FLAGS.mcts_player, uct_c, FLAGS.max_search_nodes, evaluator) # Create random bot random_bot = pyspiel.make_uniform_random_bot(game, 1 - FLAGS.mcts_player, 123) if FLAGS.mcts_player == 0: bots = [mcts_bot, random_bot] else: bots = [random_bot, mcts_bot] while not state.is_terminal(): # The state can be three different types: chance node, # simultaneous node, or decision node if state.is_chance_node(): # Chance node: sample an outcome outcomes = state.chance_outcomes() num_actions = len(outcomes) print("Chance node, got " + str(num_actions) + " outcomes") action_list, prob_list = zip(*outcomes) action = np.random.choice(action_list, p=prob_list) print("Sampled outcome: ", state.action_to_string(state.current_player(), action)) state.apply_action(action) elif state.is_simultaneous_node(): raise ValueError("Game cannot have simultaneous nodes.") else: # Decision node: sample action for the single current player _, action = bots[state.current_player()].step(state) print("Player ", state.current_player(), ", randomly sampled action: ", state.action_to_string(state.current_player(), action)) state.apply_action(action) print("Next state: ", str(state)) # Game is now done. Print return for each player returns = state.returns() for pid in range(game.num_players()): print("Return for player {} is {}".format(pid, returns[pid]))
def local_random_vs_mcts(): print("local_random_vs_mcts") game = pyspiel.load_game("tic_tac_toe") b1 = uniform_random.UniformRandomBot(0, np.random.RandomState()) b2 = mcts.MCTSBot( game, uct_c=math.sqrt(2), # starts beating random bot at ~ 3 sims, 1 rollout max_simulations=3, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2)) print_games_per_second(game, b1, b2, time_limit_s=3)
def _init_bot(bot_type, game, player_id): """Initializes a bot by type.""" if bot_type == "mcts": evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count) return mcts.MCTSBot(game, player_id, FLAGS.uct_c, FLAGS.max_search_nodes, evaluator) if bot_type == "random": return uniform_random.UniformRandomBot(game, player_id, np.random) if bot_type == "human": return human.HumanBot(game, player_id) raise ValueError("Invalid bot type: %s" % bot_type)
def test_can_play_three_player_stochastic_games(self): game = pyspiel.load_game("pig(players=3,winscore=20,horizon=30)") max_simulations = 100 evaluator = mcts.RandomRolloutEvaluator(n_rollouts=5) bots = [ mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), mcts.MCTSBot(game, UCT_C, max_simulations, evaluator), ] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertEqual(sum(v), 0)
def __init__(self, rollout_count=5, max_simulations=4000): rng = np.random.RandomState(None) evaluator = mcts.RandomRolloutEvaluator(rollout_count, rng) self.game = pyspiel.load_game("geodesic_y") self.state = self.game.new_initial_state() self.agent = mcts.MCTSBot(self.game, 2, max_simulations, evaluator, random_state=rng, solve=True, verbose=False)
def search_tic_tac_toe_state(initial_actions): game = pyspiel.load_game("tic_tac_toe") state = game.new_initial_state() for action_str in initial_actions.split(" "): state.apply_action(_get_action(state, action_str)) bot = mcts.MCTSBot(game, player=state.current_player(), uct_c=math.sqrt(2), max_simulations=10000, solve=True, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=20)) return bot.mcts_search(state), state
def test_can_play_tic_tac_toe(self): game = pyspiel.load_game("tic_tac_toe") uct_c = math.sqrt(2) max_simulations = 100 evaluator = mcts.RandomRolloutEvaluator(n_rollouts=20) bots = [ mcts.MCTSBot(game, 0, uct_c, max_simulations, evaluator), mcts.MCTSBot(game, 1, uct_c, max_simulations, evaluator), ] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertEqual(v[0] + v[1], 0)
def test_can_play_three_player_game(self): game = pyspiel.load_game("pig(players=3,winscore=20,horizon=30)") uct_c = math.sqrt(2) max_search_nodes = 100 evaluator = mcts.RandomRolloutEvaluator(n_rollouts=5) bots = [ mcts.MCTSBot(game, 0, uct_c, max_search_nodes, evaluator), mcts.MCTSBot(game, 1, uct_c, max_search_nodes, evaluator), mcts.MCTSBot(game, 2, uct_c, max_search_nodes, evaluator), ] v = evaluate_bots.evaluate_bots(game.new_initial_state(), bots, np.random) self.assertEqual(sum(v), 0)
def search_tic_tac_toe_state(initial_actions): game = pyspiel.load_game("tic_tac_toe") state = game.new_initial_state() for action_str in initial_actions.split(" "): state.apply_action(_get_action(state, action_str)) rng = np.random.RandomState(42) bot = mcts.MCTSBot(game, UCT_C, max_simulations=10000, solve=True, random_state=rng, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=20, random_state=rng)) return bot.mcts_search(state), state
def test_mcts_vs_random_game(self): server = self._start_game_server("tcp://*:5555") game = NetworkGame("tcp://localhost:5555") mcts_bot = mcts.MCTSBot( game, uct_c=math.sqrt(2), max_simulations=2, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=1)) random_bot = uniform_random.UniformRandomBot(0, np.random.RandomState()) self._play_one_game(game, mcts_bot, random_bot) game.exit() server.join()
def random_vs_remote_mcts(): print("random_vs_remote_mcts") server = start_game_server("tcp://*:5555") game = NetworkGame("tcp://localhost:5555") random_bot = uniform_random.UniformRandomBot(0, np.random.RandomState()) mcts_bot = mcts.MCTSBot( game, uct_c=math.sqrt(2), # starts beating random bot at ~ 3 sims, 1 rollout max_simulations=3, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2)) print_games_per_second(game, random_bot, mcts_bot, time_limit_s=3) game.exit() server.join()
def main(): game = NetworkGame("tcp://localhost:5555") # bot = uniform_random.UniformRandomBot(0, np.random.RandomState()) bot = mcts.MCTSBot(game, uct_c=math.sqrt(2), max_simulations=3, evaluator=mcts.RandomRolloutEvaluator(n_rollouts=2)) state = game.new_initial_state() while not state.is_terminal(): action = bot.step(state) print('bot action:', action) state.apply_action(action) game.exit() print("done") print(state)
def evaluator(*, game, config, logger, checkpoint, queue): """A process that plays the latest checkpoint vs standard MCTS.""" results = Buffer(config.evaluation_window) logger.print("Initializing model") # Load a new model if there's not a checkpoint, otherwise load the checkpoint. if checkpoint is None: model = _init_model_from_config(config) else: model = _init_model_from_checkpoint(checkpoint, config.path) logger.print("Initializing bots") az_evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) random_evaluator = mcts.RandomRolloutEvaluator() for game_num in itertools.count(): if not update_checkpoint(logger, queue, model, az_evaluator): return az_player = game_num % 2 difficulty = (game_num // 2) % config.eval_levels max_simulations = int(config.max_simulations * (10**(difficulty / 2))) bots = [ _init_bot(config, game, az_evaluator, True), mcts.MCTSBot(game, config.uct_c, max_simulations, random_evaluator, solve=True, verbose=False) ] if az_player == 1: bots = list(reversed(bots)) trajectory = _play_game(logger, game_num, game, bots, temperature=1, temperature_drop=0) results.append(trajectory.returns[az_player]) queue.put((difficulty, trajectory.returns[az_player])) logger.print("AZ: {}, MCTS: {}, AZ avg/{}: {:.3f}".format( trajectory.returns[az_player], trajectory.returns[1 - az_player], len(results), np.mean(results.data)))
def _init_bot(bot_type, game, player_id): """Initializes a bot by type.""" rng = np.random.RandomState(FLAGS.seed) if bot_type == "mcts": evaluator = mcts.RandomRolloutEvaluator(FLAGS.rollout_count, rng) return mcts.MCTSBot( game, FLAGS.uct_c, FLAGS.max_simulations, evaluator, random_state=rng, solve=FLAGS.solve, verbose=FLAGS.verbose) if bot_type == "random": return uniform_random.UniformRandomBot(player_id, rng) if bot_type == "human": return human.HumanBot() raise ValueError("Invalid bot type: %s" % bot_type)
def mcts_incremental_vs_rando(): # Play mcts vs random player, with incrementally increasing MCTS simulations # and rollouts. # Takes ages! for num_sims in range(2, 11): for num_rollouts in range(1, 11): start = datetime.now() print(f'sims: {num_sims}, rollouts: {num_rollouts}') players = [ new_mcts_bot(game, num_sims, mcts.RandomRolloutEvaluator(n_rollouts=num_rollouts)), uniform_random.UniformRandomBot(1, np.random.RandomState()) ] state = play_one_game(players) is_draw = winner_idx(state) is None mcts_won = winner_idx(state) == 0 result = 'draw' if is_draw else 'mcts: win' if mcts_won else 'mcts: lose' print(f'game over in {datetime.now() - start}, result: {result}', flush=True)
def evaluator(*, game, config, logger, num, queue): """A process that plays the latest checkpoint vs standard MCTS.""" max_simulations = config.max_simulations * (3**num) logger.print("Running MCTS with", max_simulations, "simulations") results = Buffer(config.evaluation_window) logger.print("Initializing model") model = _init_model_from_config(config) logger.print("Initializing bots") az_evaluator = evaluator_lib.AlphaZeroEvaluator(game, model) random_evaluator = mcts.RandomRolloutEvaluator() az_player = 0 bots = [ _init_bot(config, game, az_evaluator, True), mcts.MCTSBot(game, config.uct_c, max_simulations, random_evaluator, solve=True, verbose=False) ] for game_num in itertools.count(): if not update_checkpoint(logger, queue, model, az_evaluator): return trajectory = _play_game(logger, game_num, game, bots, temperature=1, temperature_drop=0) results.append(trajectory.returns[az_player]) logger.print("AZ: {}, MCTS: {}, AZ avg/{}: {:.3f}".format( trajectory.returns[az_player], trajectory.returns[1 - az_player], len(results), np.mean(results.data))) # Swap players for the next game bots = list(reversed(bots)) az_player = 1 - az_player
def mcts_vs_random(): play_one_game_and_print_results([ new_mcts_bot(game, 2, mcts.RandomRolloutEvaluator(n_rollouts=1)), uniform_random.UniformRandomBot(1, np.random.RandomState()) ])