def main(unused_argv): game = pyspiel.load_game( FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.num_players)}) oracle = optimization_oracle.EvolutionaryStrategyOracle( n_evolution_tests=FLAGS.n_evolution_tests, number_policies_sampled=FLAGS.number_policies_sampled, number_episodes_sampled=FLAGS.number_episodes_sampled, alpha=FLAGS.alpha, beta=FLAGS.beta) g_psro_solver = generalized_psro.GenPSROSolver( game, oracle, sims_per_entry=FLAGS.sims_per_entry, meta_strategy_method='nash', rectify_training=FLAGS.rectify_training) for a in range(FLAGS.gen_psro_iterations): g_psro_solver.iteration() nash_probabilities = g_psro_solver.get_and_update_meta_strategies() logging.info("%s / %s", a + 1, FLAGS.gen_psro_iterations) logging.info(nash_probabilities) meta_game = g_psro_solver.get_meta_game meta_probabilities = g_psro_solver.get_and_update_meta_strategies() logging.info("%s meta probabilities", FLAGS.game) logging.info(meta_probabilities) logging.info("") logging.info("%s Meta Game Values", FLAGS.game) logging.info(meta_game) logging.info("")
def test_gpsro(self, game_name, rnr_iterations, sims_per_entry, number_players, rectify_training, training_strategy_selector, meta_strategy_method): game = pyspiel.load_game(game_name, {"players": pyspiel.GameParameter(number_players)}) oracle = optimization_oracle.EvolutionaryStrategyOracle( number_policies_sampled=2, number_episodes_sampled=2) g_psro_solver = generalized_psro.GenPSROSolver( game, oracle, sims_per_entry=sims_per_entry, rectify_training=rectify_training, training_strategy_selector=training_strategy_selector, meta_strategy_method=meta_strategy_method) for _ in range(rnr_iterations): g_psro_solver.iteration() meta_game = g_psro_solver.get_meta_game meta_probabilities = g_psro_solver.get_and_update_meta_strategies() logging.info("%s %sP - %s", game_name, str(number_players), meta_strategy_method) logging.info("Meta Strategies") logging.info(meta_probabilities) logging.info("") logging.info("Meta Game Values") logging.info(meta_game) logging.info("")