示例#1
0
def main(unused_argv):
    game = pyspiel.load_game(
        FLAGS.game, {"players": pyspiel.GameParameter(FLAGS.num_players)})

    oracle = optimization_oracle.EvolutionaryStrategyOracle(
        n_evolution_tests=FLAGS.n_evolution_tests,
        number_policies_sampled=FLAGS.number_policies_sampled,
        number_episodes_sampled=FLAGS.number_episodes_sampled,
        alpha=FLAGS.alpha,
        beta=FLAGS.beta)
    g_psro_solver = generalized_psro.GenPSROSolver(
        game,
        oracle,
        sims_per_entry=FLAGS.sims_per_entry,
        meta_strategy_method='nash',
        rectify_training=FLAGS.rectify_training)
    for a in range(FLAGS.gen_psro_iterations):
        g_psro_solver.iteration()
        nash_probabilities = g_psro_solver.get_and_update_meta_strategies()
        logging.info("%s / %s", a + 1, FLAGS.gen_psro_iterations)
        logging.info(nash_probabilities)

    meta_game = g_psro_solver.get_meta_game
    meta_probabilities = g_psro_solver.get_and_update_meta_strategies()

    logging.info("%s meta probabilities", FLAGS.game)
    logging.info(meta_probabilities)
    logging.info("")

    logging.info("%s Meta Game Values", FLAGS.game)
    logging.info(meta_game)
    logging.info("")
  def test_gpsro(self, game_name, rnr_iterations, sims_per_entry,
                 number_players, rectify_training, training_strategy_selector,
                 meta_strategy_method):
    game = pyspiel.load_game(game_name,
                             {"players": pyspiel.GameParameter(number_players)})
    oracle = optimization_oracle.EvolutionaryStrategyOracle(
        number_policies_sampled=2, number_episodes_sampled=2)
    g_psro_solver = generalized_psro.GenPSROSolver(
        game,
        oracle,
        sims_per_entry=sims_per_entry,
        rectify_training=rectify_training,
        training_strategy_selector=training_strategy_selector,
        meta_strategy_method=meta_strategy_method)
    for _ in range(rnr_iterations):
      g_psro_solver.iteration()
    meta_game = g_psro_solver.get_meta_game
    meta_probabilities = g_psro_solver.get_and_update_meta_strategies()

    logging.info("%s %sP - %s", game_name, str(number_players),
                 meta_strategy_method)
    logging.info("Meta Strategies")
    logging.info(meta_probabilities)
    logging.info("")

    logging.info("Meta Game Values")
    logging.info(meta_game)
    logging.info("")