Exemplo n.º 1
0
 def test_deep_cfr_runs(self, game_name):
     game = pyspiel.load_game(game_name)
     deep_cfr_solver = deep_cfr.DeepCFRSolver(game,
                                              policy_network_layers=(8, 4),
                                              advantage_network_layers=(4,
                                                                        2),
                                              num_iterations=2,
                                              num_traversals=2,
                                              learning_rate=1e-3,
                                              batch_size_advantage=None,
                                              batch_size_strategy=None,
                                              memory_capacity=1e7)
     deep_cfr_solver.solve()
Exemplo n.º 2
0
 def test_matching_pennies_3p(self):
     game = pyspiel.load_game_as_turn_based('matching_pennies_3p')
     deep_cfr_solver = deep_cfr.DeepCFRSolver(game,
                                              policy_network_layers=(16, 8),
                                              advantage_network_layers=(32,
                                                                        16),
                                              num_iterations=2,
                                              num_traversals=2,
                                              learning_rate=1e-3,
                                              batch_size_advantage=None,
                                              batch_size_strategy=None,
                                              memory_capacity=1e7)
     deep_cfr_solver.solve()
     conv = pyspiel.nash_conv(
         game,
         policy.python_policy_to_pyspiel_policy(
             policy.tabular_policy_from_callable(
                 game, deep_cfr_solver.action_probabilities)))
     logging.info('Deep CFR in Matching Pennies 3p. NashConv: %.2f', conv)
def main(unused_argv):
    logging.info("Loading %s", FLAGS.game_name)
    game = pyspiel.load_game(FLAGS.game_name)

    deep_cfr_solver = deep_cfr.DeepCFRSolver(
        game,
        policy_network_layers=(32, 32),
        advantage_network_layers=(16, 16),
        num_iterations=FLAGS.num_iterations,
        num_traversals=FLAGS.num_traversals,
        learning_rate=1e-3,
        batch_size_advantage=None,
        batch_size_strategy=None,
        memory_capacity=int(1e7))

    _, advantage_losses, policy_loss = deep_cfr_solver.solve()
    for player, losses in six.iteritems(advantage_losses):
        logging.info("Advantage for player %d: %s", player,
                     losses[:2] + ["..."] + losses[-2:])
        logging.info("Advantage Buffer Size for player %s: '%s'", player,
                     len(deep_cfr_solver.advantage_buffers[player]))
    logging.info("Strategy Buffer Size: '%s'",
                 len(deep_cfr_solver.strategy_buffer))
    logging.info("Final policy loss: '%s'", policy_loss)

    average_policy = policy.tabular_policy_from_callable(
        game, deep_cfr_solver.action_probabilities)
    pyspiel_policy = policy.python_policy_to_pyspiel_policy(average_policy)
    conv = pyspiel.nash_conv(game, pyspiel_policy)
    logging.info("Deep CFR in '%s' - NashConv: %s", FLAGS.game_name, conv)

    average_policy_values = expected_game_score.policy_value(
        game.new_initial_state(), [average_policy] * 2)
    logging.info("Computed player 0 value: %.2f (expected: %.2f).",
                 average_policy_values[0], -1 / 18)
    logging.info("Computed player 1 value: %.2f (expected: %.2f).",
                 average_policy_values[1], 1 / 18)