示例#1
0
def non_test_goofspiel():
    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=42)
    for s in [10, 100, 1000]:
        mc.compute(s)
        br = BestResponse(g, 0, [None, mc])
        print(
            "Exploit after", s,
            np.mean([
                g.play_strategies([br, mc], seed=i)[-1].values()[0]
                for i in range(1000)
            ]))

    vs = GoofSpielCardsValueStore(g)
    val = SparseStochasticValueLearning(g, vs, seed=43)
    for alpha in [0.1, 0.01, 0.01, 0.001, 0.0001]:
        print(alpha)
        val.compute([mc, mc], 200, alpha)
示例#2
0
def test_goofspeil_rewards():
    g = Goofspiel(2, Goofspiel.Scoring.ZEROSUM, rewards=[100, 11])
    for _ in range(10):
        history = g.play_strategies([UniformStrategy(), UniformStrategy()])
        t = history[-1]
        assert t.values() in ([0, 0], [-89, 89], [89, -89])