Python exploitability示例，gamegym.algorithms.exploitability Python示例

示例#1

0

显示文件

def test_mccfr_goofspiel3():
    g = Goofspiel(3, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=51)
    mc.compute(600, burn=0.5)
    mcs = mc.strategies
    us = UniformStrategy()
    s1 = g.play_sequence([2])
    assert mcs[0].strategy(s1) == pytest.approx([0., 0.9, 0.], abs=0.1)
    assert sample_payoff(g, mcs, 300, seed=12)[0] == pytest.approx([0.0, 0.0],
                                                                   abs=0.1)
    assert sample_payoff(g, (mcs[0], us), 300,
                         seed=13)[0] == pytest.approx([1.2, -1.2], abs=0.2)
    assert exploitability(g, 0, mcs[0]) < 0.1
    assert exploitability(g, 1, mcs[1]) < 0.1

示例#2

0

显示文件

文件： plot_mccfr_trace.py 项目： spirali/gamegym

def compute_mccfr_traces(g,
                         prefix,
                         n_traces,
                         iters,
                         steps,
                         depth=6,
                         burn=None,
                         burn_from=0,
                         add_uniform=True,
                         exploit_every=None,
                         eploit_max_nodes=1e6):
    """
    Computes independent strategy traces of MCCFR in game `g`.
    """
    traces = []
    for ti in tqdm.trange(n_traces, desc=prefix):
        name = "MCCFR run #{}".format(ti)
        if burn and ti >= burn_from:
            name += " (burn-in)"
        mc = OutcomeMCCFR(g, seed=hash(str(g)) % 2**30 + ti)
        ps = StrategyTrace(g, depth=depth, name=name)
        for i in tqdm.trange(steps, desc="MCCFR steps"):
            w = 1.0
            if burn and ti >= burn_from and i < steps * burn:
                w = 0.03**(1.0 - float(i) / steps / burn)
            mc.compute(int(iters * (i + 1) / steps) - mc.iterations, progress=False, weight=w)
            exps = None
            if exploit_every is not None and (steps - i - 1) % exploit_every == 0:
                exps = [
                    exploitability(g, p, mc.strategies[p], max_nodes=eploit_max_nodes)
                    for p in range(g.players)
                ]
            ps.append(mc.iterations, mc.strategies, exps)
        traces.append(ps)

    if add_uniform:
        rps = StrategyTrace(g, depth=depth, name="Uniform")
        rstrat = [UniformStrategy()] * g.players
        rexps = None
        if exploit_every is not None:
            rexps = [
                exploitability(g, p, rstrat[p], max_nodes=eploit_max_nodes)
                for p in range(g.players)
            ]
        for t in traces[0].d_t:
            rps.append(t, rstrat, rexps)
    traces.append(rps)

    return traces

示例#3

0

显示文件

def test_mccfr_goofspiel4():
    g = Goofspiel(4, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=49)
    mc.compute(10000, burn=0.5)
    mcs = mc.strategies
    for p in [0, 1]:
        exp = exploitability(g, p, mcs[p])
        aexp = approx_exploitability(g, p, mcs[p], 10000, seed=31 + p)
        print(p, exp, aexp)
        assert exp == pytest.approx(0.7, abs=0.2)
        assert aexp == pytest.approx(0.7, abs=0.2)

示例#4

0

显示文件

文件： goof_exploit.py 项目： yagudin/gamegym

def main():
    N = 4
    g = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=56)

    its = 100.0
    while its < 1000000:
        fname = "goof-{}-{}.strat".format(N, its)
        mc.persist(fname, iterations=iterations=int(its) - mc.iterations)
        its *= 2 ** 0.5
        print("Exploitability after {:7d} turns (mc, g): {}, {}".format(
            int(its), exploitability(g, 0, mc), exploitability(g, 1, mc)))
    assert 0
    vs = GoofSpielCardsValueStore(g)
    vl = SparseStochasticValueLearning(g, vs, seed=41)
    vals = np.concatenate([
        vl.compute([mc, mc], 1000, alpha=0.01, store_step=1),
        vl.compute([mc, mc], 1000, alpha=0.001, store_step=1),
        vl.compute([mc, mc], 1000, alpha=0.0001, store_step=1),
    ], axis=0)
    plt.plot(vals)
    plt.show()

    print("Values:", vs.values)

    g2 = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM, rewards=vs.values)
    mc2 = OutcomeMCCFR(g2, seed=57)
    mc2.compute(iterations=ITERS)
    print("Exp(mc2, g2)", exploitability(g2, 0, mc2), exploitability(g2, 1, mc2))
    print("Exp(mc2, g)", exploitability(g, 0, mc2), exploitability(g, 1, mc2))

示例#5

0

显示文件

文件： goof_exploit.py 项目： spirali/gamegym

def main():
    N = 4
    ITERS = 2000000
    g = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM)
    mc = OutcomeMCCFR(g, seed=56)
    fname = "goof-{}".format(N)

    its = 1024
    while its < ITERS:
        cached = mc.persist(fname, iterations=its)
        if not cached:
            print("Exploitability after {:7d} turns (mc, g): {}, {}".format(
                its, exploitability(g, 0, mc), exploitability(g, 1, mc)))
        its *= 2
    infosampler = InformationSetSampler(g, mc)

    vsts = (1, 3)
    gsts = (1, 3)
    ax0 = plt.subplot(len(vsts), len(gsts), 1)
    for i, (vst, gst) in enumerate(itertools.product(vsts, gsts)):
        vs = LinearValueStore(goofspiel_feaures_cards(g.initial_state()),
                              fix_mean=(N + 1) / 2.0)
        vl = SparseSGDLinearValueLearning(g,
                                          goofspiel_feaures_cards,
                                          vs,
                                          infosampler,
                                          seed=44)
        vals = np.concatenate([
            vl.compute([mc, mc],
                       1000,
                       step=s,
                       record_every=1,
                       val_samples=vst,
                       grad_samples=gst)
            for s in [2**-8, 2**-9, 2**-10, 2**-11]
        ],
                              axis=0)
        #c = ['red', 'green', 'blue', 'black'][i]
        ax = plt.subplot(len(vsts), len(gsts), i + 1, sharex=ax0, sharey=ax0)
        ax.plot(vals)
        ax.legend(list(range(1, N + 1)))
        ax.set_title("valseps={} gradsteps={}".format(vst, gst))
        print("Done sampling valseps={} gradsteps={}".format(vst, gst))
        print("Values:", vs.values)
    plt.show()

    return

    g2 = Goofspiel(N, scoring=Goofspiel.Scoring.ZEROSUM, rewards=vs.values)
    mc2 = OutcomeMCCFR(g2, seed=57)
    mc2.compute(iterations=ITERS)
    print("Exp(mc2, g2)", exploitability(g2, 0, mc2),
          exploitability(g2, 1, mc2))
    print("Exp(mc2, g)", exploitability(g, 0, mc2), exploitability(g, 1, mc2))

示例#6

0

显示文件

文件： test_gambit.py 项目： spirali/gamegym

def test_parse_gambit_strategy_g3():
    g = Goofspiel(3, scoring=Goofspiel.Scoring.ZEROSUM)
    txt = "NE,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0,0,1,1,1,0,1,1,1,1,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1"
    strats = parse_strategy(g, txt)
    assert exploitability(g, 0, strats[0]) < 1e-6
    assert exploitability(g, 0, strats[1]) < 1e-6