def non_test_goofspiel(): g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM) mc = OutcomeMCCFR(g, seed=42) for s in [10, 100, 1000]: mc.compute(s) br = BestResponse(g, 0, [None, mc]) print( "Exploit after", s, np.mean([ g.play_strategies([br, mc], seed=i)[-1].values()[0] for i in range(1000) ])) vs = GoofSpielCardsValueStore(g) val = SparseStochasticValueLearning(g, vs, seed=43) for alpha in [0.1, 0.01, 0.01, 0.001, 0.0001]: print(alpha) val.compute([mc, mc], 200, alpha)
def test_goofspeil_rewards(): g = Goofspiel(2, Goofspiel.Scoring.ZEROSUM, rewards=[100, 11]) for _ in range(10): history = g.play_strategies([UniformStrategy(), UniformStrategy()]) t = history[-1] assert t.values() in ([0, 0], [-89, 89], [89, -89])