Exemplo n.º 1
0
def test_strategies():

    g = RockPaperScissors()
    rng = get_rng(seed=41)
    s1 = [UniformStrategy(), UniformStrategy()]
    v1 = np.mean(
        [g.play_strategies(s1, rng=rng)[-1].values() for i in range(300)], 0)
    assert sum(v1) == pytest.approx(0.0)
    assert v1[0] == pytest.approx(0.0, abs=0.1)
    s2 = [
        FixedStrategy(Explicit({
            "R": 1.0,
            "P": 0.0,
            "S": 0.0
        })),
        FixedStrategy(Explicit({
            "R": 0.5,
            "P": 0.5,
            "S": 0.0
        }))
    ]
    v2 = np.mean(
        [g.play_strategies(s2, rng=rng)[-1].values() for i in range(300)], 0)
    assert sum(v2) == pytest.approx(0.0)
    assert v2[0] == pytest.approx(-0.5, abs=0.1)
Exemplo n.º 2
0
def test_goofspiel_rewards():
    us = [UniformStrategy(), UniformStrategy()]
    g = Goofspiel(2, Goofspiel.Scoring.ZEROSUM, rewards=[100, 11])
    for i in range(50):
        s = play_strategies(g, us, seed=i)
        assert tuple(s.payoff) in ((0.0, 0.0), (-89.0, 89.0), (89.0, -89.0))

    g = Goofspiel(2, Goofspiel.Scoring.ABSOLUTE, rewards=[100, 11])
    for i in range(50):
        s = play_strategies(g, us, seed=i)
        assert tuple(s.payoff) in ((0.0, 0.0), (100.0, 11.0), (11.0, 100.0))
Exemplo n.º 3
0
def test_strategies():
    g = RockPaperScissors()
    rng = get_rng(seed=41)

    s1 = [UniformStrategy(), UniformStrategy()]
    v1 = sample_payoff(g, s1, 300, rng=rng)
    assert sum(v1[0]) == pytest.approx(0.0)
    assert v1[0] == pytest.approx([0.0, 0.0], abs=0.1)

    s2 = [
        ConstStrategy((1.0, 0.0, 0.0)),
        ConstStrategy((0.5, 0.5, 0.0)),
    ]
    v2 = sample_payoff(g, s2, 300, rng=rng)
    assert sum(v2[0]) == pytest.approx(0.0)
    assert v2[0] == pytest.approx([-0.5, 0.5], abs=0.1)
Exemplo n.º 4
0
def xtest_server():

    from gamegym.games import Goofspiel
    from gamegym.strategy import UniformStrategy

    g = Goofspiel(5)

    s = Server()
    s.play_game(g, [None, UniformStrategy()])
Exemplo n.º 5
0
def test_best_response_goofspiel():

    for n_cards, br_value in [(3, pytest.approx(4/3)), (4, pytest.approx(2.5))]:
        game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM)
        strategy = BestResponse(game, 0, {1: UniformStrategy()})
        for k, v in strategy.best_responses.items():
            reward = k[1][-1]
            assert reward not in v.values() or v.probability(reward) == 1.0
        assert strategy.value == br_value
Exemplo n.º 6
0
def test_approx_best_response_goofspiel():
    for n_cards, its, br_value in [(3, 1000, 1.333), (4, 20000, 2.5)]:
        game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM)
        strategy = ApproxBestResponse(game,
                                      0, [UniformStrategy()] * 2,
                                      iterations=its,
                                      seed=35)
        assert strategy.sample_value(its // 2) == pytest.approx(br_value,
                                                                rel=0.2)
Exemplo n.º 7
0
def test_rps():
    g = RockPaperScissors()
    us = UniformStrategy()
    rng = get_rng(seed=3)
    params = rng.rand(3, 3) - 0.5
    vs = LinearValueStore(params, fix_mean=0.0, regularize_l1=6.0)
    infosampler = InformationSetSampler(g, us)
    val = SparseSGDLinearValueLearning(g, matrix_zerosum_features, vs, infosampler, seed=44)
    val.compute([us, us], 100, 0.1, 0.1)
    val.compute([us, us], 100, 0.01, 0.01)
    val.compute([us, us], 100, 0.001, 0.001)
Exemplo n.º 8
0
def test_mccfr_goofspiel3():
    g = Goofspiel(3, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=51)
    mc.compute(600, burn=0.5)
    mcs = mc.strategies
    us = UniformStrategy()
    s1 = g.play_sequence([2])
    assert mcs[0].strategy(s1) == pytest.approx([0., 0.9, 0.], abs=0.1)
    assert sample_payoff(g, mcs, 300, seed=12)[0] == pytest.approx([0.0, 0.0],
                                                                   abs=0.1)
    assert sample_payoff(g, (mcs[0], us), 300,
                         seed=13)[0] == pytest.approx([1.2, -1.2], abs=0.2)
    assert exploitability(g, 0, mcs[0]) < 0.1
    assert exploitability(g, 1, mcs[1]) < 0.1
Exemplo n.º 9
0
def test_best_response_goofspiel():
    for n_cards, br_value in [(3, pytest.approx(4 / 3)),
                              (4, pytest.approx(2.5))]:
        game = Goofspiel(n_cards, Goofspiel.Scoring.ZEROSUM)
        strategy = BestResponse(game, 0, [UniformStrategy()] * 2)
        for k, v in strategy.best_responses.items():
            reward = k[-1]
            played_cards = k[0::3]
            idx = len([
                i for i in range(n_cards)
                if i < reward and i not in played_cards
            ])
            assert reward in played_cards or v[idx] == 1.0
        assert strategy.value == br_value
Exemplo n.º 10
0
def compute_mccfr_traces(g,
                         prefix,
                         n_traces,
                         iters,
                         steps,
                         depth=6,
                         burn=None,
                         burn_from=0,
                         add_uniform=True,
                         exploit_every=None,
                         eploit_max_nodes=1e6):
    """
    Computes independent strategy traces of MCCFR in game `g`.
    """
    traces = []
    for ti in tqdm.trange(n_traces, desc=prefix):
        name = "MCCFR run #{}".format(ti)
        if burn and ti >= burn_from:
            name += " (burn-in)"
        mc = OutcomeMCCFR(g, seed=hash(str(g)) % 2**30 + ti)
        ps = StrategyTrace(g, depth=depth, name=name)
        for i in tqdm.trange(steps, desc="MCCFR steps"):
            w = 1.0
            if burn and ti >= burn_from and i < steps * burn:
                w = 0.03**(1.0 - float(i) / steps / burn)
            mc.compute(int(iters * (i + 1) / steps) - mc.iterations, progress=False, weight=w)
            exps = None
            if exploit_every is not None and (steps - i - 1) % exploit_every == 0:
                exps = [
                    exploitability(g, p, mc.strategies[p], max_nodes=eploit_max_nodes)
                    for p in range(g.players)
                ]
            ps.append(mc.iterations, mc.strategies, exps)
        traces.append(ps)

    if add_uniform:
        rps = StrategyTrace(g, depth=depth, name="Uniform")
        rstrat = [UniformStrategy()] * g.players
        rexps = None
        if exploit_every is not None:
            rexps = [
                exploitability(g, p, rstrat[p], max_nodes=eploit_max_nodes)
                for p in range(g.players)
            ]
        for t in traces[0].d_t:
            rps.append(t, rstrat, rexps)
    traces.append(rps)

    return traces
Exemplo n.º 11
0
def main():
    print("#### Rock-paper-scissors value estimation")
    g = RockPaperScissors()
    us = UniformStrategy()
    infosampler = InformationSetSampler(g, us)
    val = LPZeroSumValueLearning(g, infosampler, matrix_zerosum_features, us)

    # Regularize: set one payoff to 1.0
    val.add_condition({(0, 1): 1.0}, 1.0)
    print("# With only non-triviality (one payoff set to 1.0)")
    print(val.compute())
    print("Flex value sum", val.flex_sum)
    # Zero diagonal
    for i in range(3):
        val.add_condition({(i, i): 1.0}, 0.0)
    print("# With zero diagonal")
    print(val.compute())
    print("Flex value sum", val.flex_sum)

    # Symmetrical payoffs
    for i in range(3):
        for j in range(i):
            val.add_condition({(i, j): -1.0, (j, i): -1.0}, 0.0)
    print("# Adding val(i,j) = -val(j,i)")
    print(val.compute())
    print("Flex value sum", val.flex_sum)

    #return ### Goofspiel(3) is boring, Goofspiel(4) hits OOM
    print("#### Goofspiel(4) card value estimation")
    g = Goofspiel(4)
    mc = OutcomeMCCFR(g, seed=42)
    mc.compute(2000)
    ef = InfoSetExpectedFeatures(g, goofspiel_feaures_cards, mc)
    for i, f in ef.info_features.items():
        print("INFOSET {}:\n{}".format(i, f))
        print(ef.info_next[i])

    return
    val = LPZeroSumValueLearning(g, infosampler, goofspiel_feaures_cards, mc)

    # Regularize: set one payoff to 1.0
    val.add_condition({(0, ): 1.0, (1, ): 1.0, (2, ): 1.0, (3, ): 1.0}, 10.0)
    print("# Regularizing card values mean to 2.5 (mean of 1..4)")
    print(len(val.conds_eq), len(val.conds_le), len(val.flex_variables))
    print(
        val.compute(
            options=dict(tol=1e-6, disp=True, sparse=True, lstsq=True)))
    print("Flex value sum", val.flex_sum)
Exemplo n.º 12
0
def test_infoset():
    g = RockPaperScissors()
    us = UniformStrategy()
    iss = InformationSetSampler(g, [us, us])
    assert iss._player_dist.probs == pytest.approx(np.array([0.5, 0.5]))
    assert iss._infoset_dist[0].probs == pytest.approx(np.array([1.0]))
    assert iss._infoset_dist[1].probs == pytest.approx(np.array([1.0]))
    assert iss._infoset_history_dist[0][()].probs == pytest.approx(
        np.array([1.0]))
    assert iss._infoset_history_dist[1][()].probs == pytest.approx(
        np.array([1.0, 1.0, 1.0]) / 3)
    iss.sample_player()
    iss.sample_info()
    assert iss.sample_info(0)[1] == ()
    assert iss.sample_info(1)[1] == ()
    assert isinstance(iss.sample_state()[2], Situation)
    assert isinstance(iss.player_distribution(), Distribution)
    assert isinstance(iss.info_distribution(0), Distribution)
    assert isinstance(iss.state_distribution(0, ()), Distribution)
Exemplo n.º 13
0
def test_goofspeil_rewards():
    g = Goofspiel(2, Goofspiel.Scoring.ZEROSUM, rewards=[100, 11])
    for _ in range(10):
        history = g.play_strategies([UniformStrategy(), UniformStrategy()])
        t = history[-1]
        assert t.values() in ([0, 0], [-89, 89], [89, -89])
Exemplo n.º 14
0
def test_best_response_limit():
    game = Goofspiel(3)
    BestResponse(game, 0, [UniformStrategy()] * 2)
    with pytest.raises(LimitExceeded, message="traversed more than"):
        BestResponse(game, 0, [UniformStrategy()] * 2, max_nodes=1024)