def test_best_response_rps(): bart_simpson_strategy = ConstStrategy((1, 0, 0)) game = RockPaperScissors() for p in [0, 1]: strategy = BestResponse(game, p, [bart_simpson_strategy] * 2) assert tuple(strategy.best_responses.values())[0] == pytest.approx( (0.0, 1.0, 0.0)) assert strategy.value == pytest.approx(1.0)
def test_rps(): g = RockPaperScissors() us = UniformStrategy() rng = get_rng(seed=3) params = rng.rand(3, 3) - 0.5 vs = LinearValueStore(params, fix_mean=0.0, regularize_l1=6.0) infosampler = InformationSetSampler(g, us) val = SparseSGDLinearValueLearning(g, matrix_zerosum_features, vs, infosampler, seed=44) val.compute([us, us], 100, 0.1, 0.1) val.compute([us, us], 100, 0.01, 0.01) val.compute([us, us], 100, 0.001, 0.001)
def test_approx_best_response_rps(): bart_simpson_strategy = ConstStrategy((1, 0, 0)) game = RockPaperScissors() for p in [0, 1]: s = ApproxBestResponse(game, 0, [bart_simpson_strategy] * 2, iterations=200, seed=23) assert s.strategy((), 3) == pytest.approx((0.0, 1.0, 0.0)) assert s.sample_value(50) == pytest.approx(1.0)
def test_best_response_rps(): bart_simpson_strategy = FixedStrategy(Explicit([1, 0, 0], values=["R", "P", "S"])) game = RockPaperScissors() strategy = BestResponse(game, 0, {1: bart_simpson_strategy}) assert list(strategy.best_responses.values())[0].probability("R") == 0.0 assert list(strategy.best_responses.values())[0].probability("P") == 1.0 assert list(strategy.best_responses.values())[0].probability("S") == 0.0 assert strategy.value == pytest.approx(1.0) strategy = BestResponse(game, 1, {0: bart_simpson_strategy}) assert list(strategy.best_responses.values())[0].probability("R") == 0.0 assert list(strategy.best_responses.values())[0].probability("P") == 1.0 assert list(strategy.best_responses.values())[0].probability("S") == 0.0 assert strategy.value == pytest.approx(1.0)
def main(): print("#### Rock-paper-scissors value estimation") g = RockPaperScissors() us = UniformStrategy() infosampler = InformationSetSampler(g, us) val = LPZeroSumValueLearning(g, infosampler, matrix_zerosum_features, us) # Regularize: set one payoff to 1.0 val.add_condition({(0, 1): 1.0}, 1.0) print("# With only non-triviality (one payoff set to 1.0)") print(val.compute()) print("Flex value sum", val.flex_sum) # Zero diagonal for i in range(3): val.add_condition({(i, i): 1.0}, 0.0) print("# With zero diagonal") print(val.compute()) print("Flex value sum", val.flex_sum) # Symmetrical payoffs for i in range(3): for j in range(i): val.add_condition({(i, j): -1.0, (j, i): -1.0}, 0.0) print("# Adding val(i,j) = -val(j,i)") print(val.compute()) print("Flex value sum", val.flex_sum) #return ### Goofspiel(3) is boring, Goofspiel(4) hits OOM print("#### Goofspiel(4) card value estimation") g = Goofspiel(4) mc = OutcomeMCCFR(g, seed=42) mc.compute(2000) ef = InfoSetExpectedFeatures(g, goofspiel_feaures_cards, mc) for i, f in ef.info_features.items(): print("INFOSET {}:\n{}".format(i, f)) print(ef.info_next[i]) return val = LPZeroSumValueLearning(g, infosampler, goofspiel_feaures_cards, mc) # Regularize: set one payoff to 1.0 val.add_condition({(0, ): 1.0, (1, ): 1.0, (2, ): 1.0, (3, ): 1.0}, 10.0) print("# Regularizing card values mean to 2.5 (mean of 1..4)") print(len(val.conds_eq), len(val.conds_le), len(val.flex_variables)) print( val.compute( options=dict(tol=1e-6, disp=True, sparse=True, lstsq=True))) print("Flex value sum", val.flex_sum)
def main(): g = MatchingPennies() base = np.array([[1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]]) plot_to_files(g, "plot_mccfr_trace_pennies", 3, 1500, 150, base=base, exploit_every=1) g = RockPaperScissors() plot_to_files(g, "plot_mccfr_trace_rps", 3, 1500, 150, burn=0.3, exploit_every=1) g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM) plot_to_files( g, "plot_mccfr_trace_goof4", 6, 1000000, 1000, depth=6, burn=0.3, burn_from=3, exploit_every=1) g = Goofspiel(5, scoring=Goofspiel.Scoring.ZEROSUM) plot_to_files( g, "plot_mccfr_trace_goof5", 6, 1000000, 1000, depth=6, burn=0.3, burn_from=3, exploit_every=10) g = DicePoker(6) plot_to_files( g, "plot_mccfr_trace_dicepoker", 6, 500000, 500, depth=6, burn=0.3, burn_from=3, exploit_every=1)
def test_infoset(): g = RockPaperScissors() us = UniformStrategy() iss = InformationSetSampler(g, [us, us]) assert iss._player_dist.probs == pytest.approx(np.array([0.5, 0.5])) assert iss._infoset_dist[0].probs == pytest.approx(np.array([1.0])) assert iss._infoset_dist[1].probs == pytest.approx(np.array([1.0])) assert iss._infoset_history_dist[0][()].probs == pytest.approx( np.array([1.0])) assert iss._infoset_history_dist[1][()].probs == pytest.approx( np.array([1.0, 1.0, 1.0]) / 3) iss.sample_player() iss.sample_info() assert iss.sample_info(0)[1] == () assert iss.sample_info(1)[1] == () assert isinstance(iss.sample_state()[2], Situation) assert isinstance(iss.player_distribution(), Distribution) assert isinstance(iss.info_distribution(0), Distribution) assert isinstance(iss.state_distribution(0, ()), Distribution)