Exemplo n.º 1
0
def test_best_response_cfr_one_card_poker():
    game = OneCardPoker.create_game(n_cards=4)

    strategy, exploitabilities, strategies = cfr(game, num_iters=10, use_chance_sampling=False)

    exploitability = compute_exploitability(game, strategy)

    print("Exploitability: {}".format(exploitability))
    assert exploitability > 0.0
Exemplo n.º 2
0
def test_best_response_cfr():
    """Test we can run 10 iterations of CFR on Leduc and then compute a best
    response.
    """
    cards = [Card(value, suit) for value in range(3) for suit in range(2)]
    game = Leduc(cards)

    strategy, exploitabilities, strategies = cfr(game, num_iters=10, use_chance_sampling=False)

    exploitability = compute_exploitability(game, strategy)

    print("Exploitability: {}".format(exploitability))
    assert exploitability > 0.0
Exemplo n.º 3
0
        default=1e-4,
        help='The number of epochs to train the neural network for.')
    parser.add_argument('--dropout_rate',
                        default=None,
                        help='The dropout rate to use.')
    args = parser.parse_args()

    dropout_rate = None
    if args.dropout_rate:
        dropout_rate = float(args.dropout_rate)

    cards = get_deck(num_values=args.num_values, num_suits=args.num_suits)
    game = Leduc(cards)

    strategy, exploitabilities, strategies = cfr(
        game,
        num_iters=args.cfr_iters,
        use_chance_sampling=args.use_chance_sampling)

    exploitability = compute_exploitability(game, strategy)
    print("Exploitability of final strategy: {}".format(exploitability))

    leduc_nfsp = LeducNFSP(cards)
    state_vectors = leduc_nfsp._state_vectors
    state_dim = leduc_nfsp.state_dim
    action_dim = leduc_nfsp.action_dim

    # Now build a network.
    layer_dims = [64, 64, 64]
    network = build_network(state_dim,
                            action_dim,
                            layer_dims,