示例#1
0
def random_nash(sigma_x=1, n=10):
    u1_mean = np.array([[-10, 0], [-3, -1]])
    u2_mean = np.array([[-10, -3], [0, -1]])
    sigma_x_mat = np.array([[sigma_x, sigma_x], [0., 0.]])
    u1_1 = np.random.normal(loc=u1_mean, scale=sigma_x_mat, size=(n, 2, 2))
    u1_2 = np.random.normal(loc=u2_mean, scale=sigma_x_mat, size=(n, 2, 2))
    u2_1 = np.random.normal(loc=u1_mean, scale=sigma_x_mat, size=(n, 2, 2))
    u2_2 = np.random.normal(loc=u2_mean, scale=sigma_x_mat, size=(n, 2, 2))
    crash_lst = []
    for i in range(n):
        u1_1_i, u1_2_i, u2_1_i, u2_2_i = u1_1[i], u1_2[i], u2_1[i], u2_2[i]
        a1_1, _, _ = get_welfare_optimal_eq(nash.Game(u1_1_i, u1_2_i))
        _, a2_2, _ = get_welfare_optimal_eq(nash.Game(u2_1_i, u2_2_i))
        crash = (a1_1[0] == a2_2[0] == 1)
        crash_lst.append(crash)
    print(np.mean(crash_lst))
示例#2
0
def evaluate_reporting_policy_profile(distort_1, distort_2, sd=0.5, nrep=100):
    v1_mean = 0.
    v2_mean = 0.
    v1_default_mean = 0.
    v2_default_mean = 0.
    true_game = nash.Game(G_1, G_2)
    prop_agree = 0.
    v1_list = []
    v2_list = []
    for rep in range(nrep):
        G_1_private, G_2_private = draw_chicken_game(sd=sd)
        G_1_rep = copy.copy(G_1_private)
        G_2_rep = copy.copy(G_2_private)
        G_1_rep[1, 0] += distort_1
        G_2_rep[1, 0] += distort_2
        combine, combined_game = combine_reports(G_1_rep, G_2_rep, sd=sd)
        prop_agree += combine / nrep

        a1_default, _, _ = get_welfare_optimal_eq(nash.Game(G_1_rep, G_2))
        _, a2_default, _ = get_welfare_optimal_eq(nash.Game(G_2_rep, G_2))
        v1_default, v2_default = true_game[(a1_default, a2_default)]
        v1_default_mean += v1_default / nrep
        v2_default_mean += v2_default / nrep

        if combine:
            a1, a2, _ = get_welfare_optimal_eq(nash.Game(combined_game, G_2))
        else:
            a1, a2 = a1_default, a2_default
        v1, v2 = true_game[(a1, a2)]
        v1_mean += v1 / nrep
        v2_mean += v2 / nrep
        v1_list.append(v1)
        v2_list.append(v2)
    v1_se = np.std(v1_list) / nrep
    v2_se = np.std(v2_list) / nrep
    return v1_mean, v2_mean, v1_default_mean, v2_default_mean, prop_agree, v1_se, v2_se
示例#3
0
def monotone(u1=None, u2=None):
    if u1 is None:
        u1 = np.array([[-10, 0.5], [-3, -1]])
    if u2 is None:
        u2 = np.array([[-10, -3], [0, -1]])

    all_payoffs = []
    for rep in range(100):
        direction = np.random.uniform(low=-1, high=1, size=(2, 2))
        payoffs_1 = []
        magnitudes = np.linspace(0, 10, 100)
        for magnitude in magnitudes:
            u1_perturbed = u1 + direction * magnitude
            _, _, v1 = get_welfare_optimal_eq(nash.Game(u1_perturbed, u2))
            payoffs_1.append(v1)
        all_payoffs.append(payoffs_1)

    plt.plot(magnitudes, np.mean(all_payoffs, axis=0))
    plt.show()
    return
示例#4
0
    v2_se = np.std(v2_list) / nrep
    return v1_mean, v2_mean, v1_default_mean, v2_default_mean, prop_agree, v1_se, v2_se


if __name__ == "__main__":
    sd = 0.5
    payoffs_1 = np.zeros((4, 4))
    payoffs_2 = np.zeros((4, 4))
    se_1 = np.zeros((4, 4))
    se_2 = np.zeros((4, 4))
    for i, distort_1 in enumerate(np.linspace(0, sd / 2, 4)):
        for j, distort_2 in enumerate(np.linspace(0, sd / 4, 4)):
            v1, v2, v1_default, v2_default, prop_agree, v1_se, v2_se = \
                evaluate_reporting_policy_profile(distort_1, -distort_2, nrep=1000)
            payoffs_1[i, j] = v1
            payoffs_2[i, j] = v2
            se_1[i, j] = v1_se
            se_2[i, j] = v2_se
            if i == 0 and j == 0:
                print(f'default: {v1_default}, {v2_default}')
    g = nash.Game(payoffs_1, payoffs_2)
    a1, a2, _ = get_welfare_optimal_eq(g)
    print(payoffs_1)
    print(payoffs_2)
    print(a1, a2)
    print(g[(a1, a2)])
    final_se_1 = np.sqrt(np.dot(a2, np.dot(se_1**2, a1)))
    final_se_2 = np.sqrt(np.dot(a1, np.dot(se_2**2, a2)))
    print(final_se_1)
    print(final_se_2)
示例#5
0
def nash_reporting_policy(env='coop',
                          time_horizon=100,
                          n=5,
                          mc_rep=100,
                          nA=10,
                          tau=1.,
                          eps_upper=1.,
                          policy='coop',
                          bandit_kwargs={}):
    # Compute payoff matrix
    if env == 'coop':
        epsilon_1_space = np.linspace(0.1, eps_upper, nA)
        epsilon_2_space = np.linspace(0.1, eps_upper, nA)
        sigma_upper = 2.
    elif env == 'ug':
        epsilon_1_space = np.linspace(0, eps_upper, nA)
        epsilon_2_space = np.linspace(0, eps_upper, nA)
        sigma_upper = 1.

    if policy == 'cb':
        nA = nA**2
        epsilon_1_prod = [(eps1, eps2) for eps1 in epsilon_1_space
                          for eps2 in epsilon_2_space]
        epsilon_2_prod = [(eps2, eps1) for eps2 in epsilon_2_space
                          for eps1 in epsilon_1_space]
    else:
        epsilon_1_prod = [(eps1, eps1) for eps1 in epsilon_1_space]
        epsilon_2_prod = [(eps2, eps2) for eps2 in epsilon_2_space]

    payoffs_1 = np.zeros((nA + 1, nA + 1))
    payoffs_2 = np.zeros((nA + 1, nA + 1))
    standard_errors_1 = np.zeros((nA + 1, nA + 1))
    standard_errors_2 = np.zeros((nA + 1, nA + 1))

    for i, (epsilon_1, epsilon_21) in enumerate(epsilon_1_prod):
        for j, (epsilon_2, epsilon_12) in enumerate(epsilon_2_prod):
            se_ij_1 = []
            se_ij_2 = []
            print(i, j)
            for rep in range(mc_rep):
                rewards_rep_1, rewards_rep_2, _, _ = \
                  bandit(policy=policy, time_horizon=time_horizon, n=n, sigma_tol=tau, sigma_upper=sigma_upper,
                        env=env, epsilon_1=epsilon_1, epsilon_2=epsilon_2, epsilon_12=epsilon_12, epsilon_21=epsilon_21,
                         **bandit_kwargs)
                payoffs_1[i, j] += np.mean(rewards_rep_1) / mc_rep
                payoffs_2[i, j] += np.mean(rewards_rep_2) / mc_rep
                se_ij_1 += list(rewards_rep_1)
                se_ij_2 += list(rewards_rep_2)
            standard_errors_1[i, j] = np.std(se_ij_1) / np.sqrt(len(se_ij_1))
            standard_errors_2[i, j] = np.std(se_ij_2) / np.sqrt(len(se_ij_2))

    # Get independent payoffs
    se_1 = []
    se_2 = []
    for rep in range(mc_rep):
        rewards_rep_1, rewards_rep_2, _, _ = \
          bandit(policy='ind', time_horizon=time_horizon, n=n, sigma_tol=tau, sigma_upper=sigma_upper,
                 env=env, epsilon_1=epsilon_1, epsilon_2=epsilon_2, **bandit_kwargs)
        payoffs_1[nA, :] += np.mean(rewards_rep_1) / mc_rep
        payoffs_1[:-1, nA] += np.mean(rewards_rep_1) / mc_rep
        payoffs_2[nA, :] += np.mean(rewards_rep_2) / mc_rep
        payoffs_2[:-1, nA] += np.mean(rewards_rep_2) / mc_rep
        se_1 += list(rewards_rep_1)
        se_2 += list(rewards_rep_2)

    standard_errors_1[nA, :] = np.std(se_1) / np.sqrt(len(rewards_rep_1))
    standard_errors_1[:-1, nA] = np.std(se_1) / np.sqrt(len(rewards_rep_1))
    standard_errors_2[nA, :] = np.std(se_2) / np.sqrt(len(rewards_rep_2))
    standard_errors_2[:-1, nA] = np.std(se_2) / np.sqrt(len(rewards_rep_2))

    # print(standard_errors_1)
    # print(standard_errors_2)

    # Compute nash
    payoffs_1 = payoffs_1.round(2)
    payoffs_2 = payoffs_2.round(2)
    d1, d2 = payoffs_1[nA, nA], payoffs_2[nA, nA]
    # e1, e2, _ = get_nash_welfare_optimal_eq(nash.Game(payoffs_1, payoffs_2), d1, d2)
    e1, e2, _ = get_welfare_optimal_eq(nash.Game(payoffs_1, payoffs_2))
    se_1 = np.sqrt(np.dot(e1, np.dot(standard_errors_1**2,
                                     e2)))  # ToDo: check se calculation
    se_2 = np.sqrt(np.dot(e1, np.dot(standard_errors_2**2, e2)))
    v1, v2 = expected_payoffs(payoffs_1, payoffs_2, e1, e2)
    return {
        'epsilon_1_space': epsilon_1_space,
        'epsilon_2_space': epsilon_2_space,
        'e1': e1,
        'e2': e2,
        'v1': v1,
        'v2': v2,
        'payoffs_1': payoffs_1,
        'payoffs_2': payoffs_2,
        'se_1': se_1,
        'se_2': se_2
    }
示例#6
0
def alternating(a1,
                a2,
                u1_mean=None,
                u2_mean=None,
                bias_2_1=np.zeros((2, 2)),
                bias_2_2=np.zeros((2, 2)),
                n=2,
                sigma_u=1,
                sigma_x=20,
                sigma_tol=0.1):
    """
  ai: int in {0, 1}, 0=collab and 1=independent.
  """
    # Parameters
    if u1_mean is None:
        u1_mean = np.array([[-10, 0], [-3, -1]])
    if u2_mean is None:
        u2_mean = np.array([[-10, -3], [0, -1]])
    sigma_x_mat = np.array([[sigma_x, sigma_x], [0., 0.]])

    # Generate true 2x2 payoffs
    u1 = np.random.normal(loc=u1_mean, scale=sigma_u, size=(2, 2))
    u2 = np.random.normal(loc=u2_mean, scale=sigma_u, size=(2, 2))

    # Generate obs
    x1_1 = np.random.normal(loc=u1, scale=sigma_x_mat, size=(n, 2, 2))
    x1_2 = np.random.normal(loc=u2, scale=sigma_x_mat, size=(n, 2, 2))
    x2_1 = np.random.normal(loc=u1 + bias_2_1,
                            scale=sigma_x_mat,
                            size=(n, 2, 2))
    x2_2 = np.random.normal(loc=u2 + bias_2_2,
                            scale=sigma_x_mat,
                            size=(n, 2, 2))
    x1_1_mean = x1_1.mean(axis=0)
    x1_2_mean = x1_2.mean(axis=0)
    x2_1_mean = x2_1.mean(axis=0)
    x2_2_mean = x2_2.mean(axis=0)
    # x1_1_std = x1_1.std(axis=0)
    # x1_2_std = x1_2.std(axis=0)
    # x2_1_std = x2_1.std(axis=0)
    # x2_2_std = x2_2.std(axis=0)

    # Greedy alternating offers
    public_mean_1 = np.zeros((2, 2))
    public_mean_2 = np.zeros((2, 2))
    best_payoff_1 = -np.float('inf')
    best_payoff_2 = -np.float('inf')
    ixs_1 = []
    ixs_2 = []
    i = 0
    # ToDo: check indices for nashpy
    # ToDo: rule for rejecting
    done = False
    t = 0
    if a1 == 1 and a2 == 1:
        while t < 1 and not done:
            # ToDo: passing known sigma_x to check_matrix_distances in get_welfare_optimal_observation, whereas in practice
            # ToDo: this is unknown
            best_payoff_1_t, best_obs_11, best_obs_12, best_ix_1, best_a1, best_a2 = \
              get_welfare_optimal_observation(x1_1, x1_2, public_mean_1, public_mean_2, len(ixs_1) + len(ixs_2), x1_1_mean,
                                              x1_2_mean, ixs_1, sigma_x / np.sqrt(n), sigma_tol=sigma_tol)
            ixs_1.append(best_ix_1)
            public_mean_1 += (best_obs_11 - public_mean_1) / (len(ixs_1) +
                                                              len(ixs_2))
            public_mean_2 += (best_obs_12 - public_mean_2) / (len(ixs_1) +
                                                              len(ixs_2))
            # # Other player naively incorporates new info
            # x2_1_mean += (best_obs_1 - x2_1_mean) / (t + 1 + n)
            # x2_2_mean += (best_obs_2 - x2_2_mean) / (t + 1 + n)

            i += 1
            x2_2 = [np.array([[-10, -3], [1., -1]])]
            x2_2.append(public_mean_2)
            # x2_1 = [x2_1_mean + np.random.uniform(low=-10, high=10, size=((2, 2))) for _ in range(n)]
            x2_1 = [np.array([[-10, -0.5], [-3, -1]])]
            x2_1.append(public_mean_1)
            best_payoff_2_t, best_obs_22, best_obs_21, best_ix_2, best_a2, best_a1 = \
              get_welfare_optimal_observation(x2_2, x2_1, public_mean_2, public_mean_1, len(ixs_1) + len(ixs_2), x2_2_mean,
                                              x2_1_mean, ixs_2, sigma_x / np.sqrt(n), sigma_tol=sigma_tol)
            ixs_2.append(best_ix_2)
            public_mean_1 += (best_obs_21 - public_mean_1) / (len(ixs_1) +
                                                              len(ixs_2))
            public_mean_2 += (best_obs_22 - public_mean_2) / (len(ixs_1) +
                                                              len(ixs_2))
            # # Other player naively incorporates new info
            # x1_1_mean += (best_obs_1 - x1_1_mean) / (t + 1 + n)
            # x1_2_mean += (best_obs_2 - x1_2_mean) / (t + 1 + n)
            t += 1
            i += 1

        close_enough = check_matrix_distances(best_obs_11,
                                              best_obs_12,
                                              best_obs_21,
                                              best_obs_22,
                                              sigma_x / np.sqrt(n),
                                              sigma_tol=sigma_tol)
        if close_enough:
            d1, d2, _ = get_welfare_optimal_eq(
                nash.Game(public_mean_1, public_mean_2))
        else:
            d1, _, _ = get_welfare_optimal_eq(nash.Game(x1_1_mean, x1_2_mean))
            _, d2, _ = get_welfare_optimal_eq(nash.Game(x2_1_mean, x2_2_mean))
    else:
        d1, _, _ = get_welfare_optimal_eq(nash.Game(x1_1_mean, x1_2_mean))
        _, d2, _ = get_welfare_optimal_eq(nash.Game(x2_1_mean, x2_2_mean))
        close_enough = False

    r1, r2 = expected_payoffs(u1, u2, d1, d2)
    return r1, r2, close_enough