示例#1
0
def test_best_response_rps():
    bart_simpson_strategy = ConstStrategy((1, 0, 0))
    game = RockPaperScissors()
    for p in [0, 1]:
        strategy = BestResponse(game, p, [bart_simpson_strategy] * 2)
        assert tuple(strategy.best_responses.values())[0] == pytest.approx(
            (0.0, 1.0, 0.0))
        assert strategy.value == pytest.approx(1.0)
示例#2
0
def test_rps():
    g = RockPaperScissors()
    us = UniformStrategy()
    rng = get_rng(seed=3)
    params = rng.rand(3, 3) - 0.5
    vs = LinearValueStore(params, fix_mean=0.0, regularize_l1=6.0)
    infosampler = InformationSetSampler(g, us)
    val = SparseSGDLinearValueLearning(g, matrix_zerosum_features, vs, infosampler, seed=44)
    val.compute([us, us], 100, 0.1, 0.1)
    val.compute([us, us], 100, 0.01, 0.01)
    val.compute([us, us], 100, 0.001, 0.001)
示例#3
0
def test_approx_best_response_rps():
    bart_simpson_strategy = ConstStrategy((1, 0, 0))
    game = RockPaperScissors()

    for p in [0, 1]:
        s = ApproxBestResponse(game,
                               0, [bart_simpson_strategy] * 2,
                               iterations=200,
                               seed=23)
        assert s.strategy((), 3) == pytest.approx((0.0, 1.0, 0.0))
        assert s.sample_value(50) == pytest.approx(1.0)
示例#4
0
def test_best_response_rps():

    bart_simpson_strategy = FixedStrategy(Explicit([1, 0, 0], values=["R", "P", "S"]))
    game = RockPaperScissors()
    strategy = BestResponse(game, 0, {1: bart_simpson_strategy})
    assert list(strategy.best_responses.values())[0].probability("R") == 0.0
    assert list(strategy.best_responses.values())[0].probability("P") == 1.0
    assert list(strategy.best_responses.values())[0].probability("S") == 0.0
    assert strategy.value == pytest.approx(1.0)

    strategy = BestResponse(game, 1, {0: bart_simpson_strategy})
    assert list(strategy.best_responses.values())[0].probability("R") == 0.0
    assert list(strategy.best_responses.values())[0].probability("P") == 1.0
    assert list(strategy.best_responses.values())[0].probability("S") == 0.0
    assert strategy.value == pytest.approx(1.0)
示例#5
0
def main():
    print("#### Rock-paper-scissors value estimation")
    g = RockPaperScissors()
    us = UniformStrategy()
    infosampler = InformationSetSampler(g, us)
    val = LPZeroSumValueLearning(g, infosampler, matrix_zerosum_features, us)

    # Regularize: set one payoff to 1.0
    val.add_condition({(0, 1): 1.0}, 1.0)
    print("# With only non-triviality (one payoff set to 1.0)")
    print(val.compute())
    print("Flex value sum", val.flex_sum)
    # Zero diagonal
    for i in range(3):
        val.add_condition({(i, i): 1.0}, 0.0)
    print("# With zero diagonal")
    print(val.compute())
    print("Flex value sum", val.flex_sum)

    # Symmetrical payoffs
    for i in range(3):
        for j in range(i):
            val.add_condition({(i, j): -1.0, (j, i): -1.0}, 0.0)
    print("# Adding val(i,j) = -val(j,i)")
    print(val.compute())
    print("Flex value sum", val.flex_sum)

    #return ### Goofspiel(3) is boring, Goofspiel(4) hits OOM
    print("#### Goofspiel(4) card value estimation")
    g = Goofspiel(4)
    mc = OutcomeMCCFR(g, seed=42)
    mc.compute(2000)
    ef = InfoSetExpectedFeatures(g, goofspiel_feaures_cards, mc)
    for i, f in ef.info_features.items():
        print("INFOSET {}:\n{}".format(i, f))
        print(ef.info_next[i])

    return
    val = LPZeroSumValueLearning(g, infosampler, goofspiel_feaures_cards, mc)

    # Regularize: set one payoff to 1.0
    val.add_condition({(0, ): 1.0, (1, ): 1.0, (2, ): 1.0, (3, ): 1.0}, 10.0)
    print("# Regularizing card values mean to 2.5 (mean of 1..4)")
    print(len(val.conds_eq), len(val.conds_le), len(val.flex_variables))
    print(
        val.compute(
            options=dict(tol=1e-6, disp=True, sparse=True, lstsq=True)))
    print("Flex value sum", val.flex_sum)
示例#6
0
def main():

    g = MatchingPennies()
    base = np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]])
    plot_to_files(g, "plot_mccfr_trace_pennies", 3, 1500, 150, base=base, exploit_every=1)

    g = RockPaperScissors()
    plot_to_files(g, "plot_mccfr_trace_rps", 3, 1500, 150, burn=0.3, exploit_every=1)

    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    plot_to_files(
        g,
        "plot_mccfr_trace_goof4",
        6,
        1000000,
        1000,
        depth=6,
        burn=0.3,
        burn_from=3,
        exploit_every=1)

    g = Goofspiel(5, scoring=Goofspiel.Scoring.ZEROSUM)
    plot_to_files(
        g,
        "plot_mccfr_trace_goof5",
        6,
        1000000,
        1000,
        depth=6,
        burn=0.3,
        burn_from=3,
        exploit_every=10)

    g = DicePoker(6)
    plot_to_files(
        g,
        "plot_mccfr_trace_dicepoker",
        6,
        500000,
        500,
        depth=6,
        burn=0.3,
        burn_from=3,
        exploit_every=1)
示例#7
0
def test_infoset():
    g = RockPaperScissors()
    us = UniformStrategy()
    iss = InformationSetSampler(g, [us, us])
    assert iss._player_dist.probs == pytest.approx(np.array([0.5, 0.5]))
    assert iss._infoset_dist[0].probs == pytest.approx(np.array([1.0]))
    assert iss._infoset_dist[1].probs == pytest.approx(np.array([1.0]))
    assert iss._infoset_history_dist[0][()].probs == pytest.approx(
        np.array([1.0]))
    assert iss._infoset_history_dist[1][()].probs == pytest.approx(
        np.array([1.0, 1.0, 1.0]) / 3)
    iss.sample_player()
    iss.sample_info()
    assert iss.sample_info(0)[1] == ()
    assert iss.sample_info(1)[1] == ()
    assert isinstance(iss.sample_state()[2], Situation)
    assert isinstance(iss.player_distribution(), Distribution)
    assert isinstance(iss.info_distribution(0), Distribution)
    assert isinstance(iss.state_distribution(0, ()), Distribution)