def plot_qs_pcs_M(game, num_steps=10000, **kwargs):
    """Plot qs, pcs, and M together."""
    game = game.copy_with_agents(hist_agent("qs_hist", "qs"),
                                 hist_agent("pcs_hist", "pcs"),
                                 hist_agent("M_hist", "M"))
    game.run(num_steps)

    fig, axs = plt.subplots(3, 2)

    plot_qs(game, num_steps=None, axs=axs[0], **kwargs)
    plot_pcs(game, num_steps=None, axs=axs[1], **kwargs)
    plot_M(game, num_steps=None, axs=axs[2], **kwargs)

    plt.show()
def plot_M(game, num_steps=10000, axs=None, **kwargs):
    if num_steps is not None:
        game = game.copy_with_agents(hist_agent("M_hist", "M"))
        game.run(num_steps)

    show = axs is None
    if axs is None:
        fig, axs = plt.subplots(1, 2)

    MCs = (list)((starmap)(lambda i, M: M / (i + 2), (enumerate)(
        (map)(_coconut.operator.itemgetter((C)), game.env["M_hist"]))))
    MDs = (list)((starmap)(lambda i, M: M / (i + 2), (enumerate)(
        (map)(_coconut.operator.itemgetter((D)), game.env["M_hist"]))))

    xs = range(1, len(game.env["M_hist"]) + 1)
    game.plot(axs[0], xs, MCs, label="M(C)/M", **kwargs)
    game.plot(axs[0], xs, MDs, label="M(D)/M", **kwargs)
    axs[0].set(xlabel="t")
    axs[0].legend()

    log_xs = (list)((map)(log, xs))
    game.plot(axs[1], log_xs, MCs, label="M(C)/M", **kwargs)
    game.plot(axs[1], log_xs, MDs, label="M(D)/M", **kwargs)
    axs[1].set(xlabel="log(t)")
    axs[1].legend()

    if show:
        plt.show()
def plot_pcs(game, num_steps=10000, axs=None, **kwargs):
    """Plot pcs over time in the given game."""
    if num_steps is not None:
        game = game.copy_with_agents(hist_agent("pcs_hist", "pcs"))
        game.run(num_steps)

    show = axs is None
    if axs is None:
        fig, axs = plt.subplots(1, 2)

    xs = range(1, len(game.env["pcs_hist"]) + 1)
    game.plot(axs[0],
              xs,
              lambda env: (map)(_coconut.operator.itemgetter(
                  (C)), env["pcs_hist"]),
              label="P(C|C)",
              **kwargs)
    game.plot(axs[0],
              xs,
              lambda env: (map)(_coconut.operator.itemgetter(
                  (D)), env["pcs_hist"]),
              label="P(C|D)",
              **kwargs)
    axs[0].set(xlabel="t")
    axs[0].legend()

    log_xs = (list)((map)(log, xs))
    game.plot(axs[1],
              log_xs,
              lambda env: (map)(_coconut.operator.itemgetter(
                  (C)), env["pcs_hist"]),
              label="P(C|C)",
              **kwargs)
    game.plot(axs[1],
              log_xs,
              lambda env: (map)(_coconut.operator.itemgetter(
                  (D)), env["pcs_hist"]),
              label="P(C|D)",
              **kwargs)
    axs[1].set(xlabel="log(t)")
    axs[1].legend()

    if show:
        plt.show()
def run_experiment(game,
                   num_iters=500,
                   num_steps=5000,
                   bucket_size=0.01,
                   pc_calc_steps=500):
    """Measure limiting behavior for the given game."""
    game = game.copy_with_agents(
        hist_agent("a_hist", "a", maxhist=pc_calc_steps))
    buckets = [0] * int(1 / bucket_size)
    coop_props = []
    print("Running experiment for {_coconut_format_0}...".format(
        _coconut_format_0=(game.name)))
    for _ in tqdm(range(num_iters)):
        game.run(num_steps, use_tqdm=False)
        prop_coop = sum((a == C for a in game.env["a_hist"])) / pc_calc_steps
        coop_props.append(prop_coop)
        bucket = int(prop_coop // bucket_size)
        if bucket == len(buckets):
            bucket -= 1
        buckets[bucket] += 1
        game.reset()
    for i in range(len(buckets)):
        buckets[i] /= num_iters
    return buckets, sum(coop_props) / len(coop_props)
])
PREV_DEFECT_PENALTY = PREV_COOP_REWARD - 2

SELF_PD_DEL_C = COOP_PENALTY, 3 * PREV_COOP_REWARD
PAY_FORWARD_DEL_C = COOP_PENALTY, 2 * PREV_COOP_REWARD
COOKIE_DEL_C = DEFECT_REWARD, 2 * PREV_DEFECT_PENALTY
BUTTON_DEL_C = NO_REWARD, PREV_COOP_REWARD


def coop_with_prob(p):
    return np.random.binomial(1, 1 - p)


common_params = dict(INIT_C_PROB=0.5, DEL_C=SELF_PD_DEL_C, USE_STATE=False)

a_hist_1step = hist_agent("a_hist_1step", "a", maxhist=1)


def get_prev_a(env):
    if not env["a_hist_1step"]:
        env["a_hist_1step"].append(coop_with_prob(env["INIT_C_PROB"]))
    return env["a_hist_1step"][-1]


@agent(name="r")
def get_corrupted_feedback(env, s=None, a=None, k=None):
    s = get_prev_a(env) if s is None else s
    a = env["a"] if a is None else a
    k = a if k is None else k
    DEL, C = env["DEL_C"]
    feedback = DEL[s][k]