def random_nash(sigma_x=1, n=10): u1_mean = np.array([[-10, 0], [-3, -1]]) u2_mean = np.array([[-10, -3], [0, -1]]) sigma_x_mat = np.array([[sigma_x, sigma_x], [0., 0.]]) u1_1 = np.random.normal(loc=u1_mean, scale=sigma_x_mat, size=(n, 2, 2)) u1_2 = np.random.normal(loc=u2_mean, scale=sigma_x_mat, size=(n, 2, 2)) u2_1 = np.random.normal(loc=u1_mean, scale=sigma_x_mat, size=(n, 2, 2)) u2_2 = np.random.normal(loc=u2_mean, scale=sigma_x_mat, size=(n, 2, 2)) crash_lst = [] for i in range(n): u1_1_i, u1_2_i, u2_1_i, u2_2_i = u1_1[i], u1_2[i], u2_1[i], u2_2[i] a1_1, _, _ = get_welfare_optimal_eq(nash.Game(u1_1_i, u1_2_i)) _, a2_2, _ = get_welfare_optimal_eq(nash.Game(u2_1_i, u2_2_i)) crash = (a1_1[0] == a2_2[0] == 1) crash_lst.append(crash) print(np.mean(crash_lst))
def evaluate_reporting_policy_profile(distort_1, distort_2, sd=0.5, nrep=100): v1_mean = 0. v2_mean = 0. v1_default_mean = 0. v2_default_mean = 0. true_game = nash.Game(G_1, G_2) prop_agree = 0. v1_list = [] v2_list = [] for rep in range(nrep): G_1_private, G_2_private = draw_chicken_game(sd=sd) G_1_rep = copy.copy(G_1_private) G_2_rep = copy.copy(G_2_private) G_1_rep[1, 0] += distort_1 G_2_rep[1, 0] += distort_2 combine, combined_game = combine_reports(G_1_rep, G_2_rep, sd=sd) prop_agree += combine / nrep a1_default, _, _ = get_welfare_optimal_eq(nash.Game(G_1_rep, G_2)) _, a2_default, _ = get_welfare_optimal_eq(nash.Game(G_2_rep, G_2)) v1_default, v2_default = true_game[(a1_default, a2_default)] v1_default_mean += v1_default / nrep v2_default_mean += v2_default / nrep if combine: a1, a2, _ = get_welfare_optimal_eq(nash.Game(combined_game, G_2)) else: a1, a2 = a1_default, a2_default v1, v2 = true_game[(a1, a2)] v1_mean += v1 / nrep v2_mean += v2 / nrep v1_list.append(v1) v2_list.append(v2) v1_se = np.std(v1_list) / nrep v2_se = np.std(v2_list) / nrep return v1_mean, v2_mean, v1_default_mean, v2_default_mean, prop_agree, v1_se, v2_se
def monotone(u1=None, u2=None): if u1 is None: u1 = np.array([[-10, 0.5], [-3, -1]]) if u2 is None: u2 = np.array([[-10, -3], [0, -1]]) all_payoffs = [] for rep in range(100): direction = np.random.uniform(low=-1, high=1, size=(2, 2)) payoffs_1 = [] magnitudes = np.linspace(0, 10, 100) for magnitude in magnitudes: u1_perturbed = u1 + direction * magnitude _, _, v1 = get_welfare_optimal_eq(nash.Game(u1_perturbed, u2)) payoffs_1.append(v1) all_payoffs.append(payoffs_1) plt.plot(magnitudes, np.mean(all_payoffs, axis=0)) plt.show() return
v2_se = np.std(v2_list) / nrep return v1_mean, v2_mean, v1_default_mean, v2_default_mean, prop_agree, v1_se, v2_se if __name__ == "__main__": sd = 0.5 payoffs_1 = np.zeros((4, 4)) payoffs_2 = np.zeros((4, 4)) se_1 = np.zeros((4, 4)) se_2 = np.zeros((4, 4)) for i, distort_1 in enumerate(np.linspace(0, sd / 2, 4)): for j, distort_2 in enumerate(np.linspace(0, sd / 4, 4)): v1, v2, v1_default, v2_default, prop_agree, v1_se, v2_se = \ evaluate_reporting_policy_profile(distort_1, -distort_2, nrep=1000) payoffs_1[i, j] = v1 payoffs_2[i, j] = v2 se_1[i, j] = v1_se se_2[i, j] = v2_se if i == 0 and j == 0: print(f'default: {v1_default}, {v2_default}') g = nash.Game(payoffs_1, payoffs_2) a1, a2, _ = get_welfare_optimal_eq(g) print(payoffs_1) print(payoffs_2) print(a1, a2) print(g[(a1, a2)]) final_se_1 = np.sqrt(np.dot(a2, np.dot(se_1**2, a1))) final_se_2 = np.sqrt(np.dot(a1, np.dot(se_2**2, a2))) print(final_se_1) print(final_se_2)
def nash_reporting_policy(env='coop', time_horizon=100, n=5, mc_rep=100, nA=10, tau=1., eps_upper=1., policy='coop', bandit_kwargs={}): # Compute payoff matrix if env == 'coop': epsilon_1_space = np.linspace(0.1, eps_upper, nA) epsilon_2_space = np.linspace(0.1, eps_upper, nA) sigma_upper = 2. elif env == 'ug': epsilon_1_space = np.linspace(0, eps_upper, nA) epsilon_2_space = np.linspace(0, eps_upper, nA) sigma_upper = 1. if policy == 'cb': nA = nA**2 epsilon_1_prod = [(eps1, eps2) for eps1 in epsilon_1_space for eps2 in epsilon_2_space] epsilon_2_prod = [(eps2, eps1) for eps2 in epsilon_2_space for eps1 in epsilon_1_space] else: epsilon_1_prod = [(eps1, eps1) for eps1 in epsilon_1_space] epsilon_2_prod = [(eps2, eps2) for eps2 in epsilon_2_space] payoffs_1 = np.zeros((nA + 1, nA + 1)) payoffs_2 = np.zeros((nA + 1, nA + 1)) standard_errors_1 = np.zeros((nA + 1, nA + 1)) standard_errors_2 = np.zeros((nA + 1, nA + 1)) for i, (epsilon_1, epsilon_21) in enumerate(epsilon_1_prod): for j, (epsilon_2, epsilon_12) in enumerate(epsilon_2_prod): se_ij_1 = [] se_ij_2 = [] print(i, j) for rep in range(mc_rep): rewards_rep_1, rewards_rep_2, _, _ = \ bandit(policy=policy, time_horizon=time_horizon, n=n, sigma_tol=tau, sigma_upper=sigma_upper, env=env, epsilon_1=epsilon_1, epsilon_2=epsilon_2, epsilon_12=epsilon_12, epsilon_21=epsilon_21, **bandit_kwargs) payoffs_1[i, j] += np.mean(rewards_rep_1) / mc_rep payoffs_2[i, j] += np.mean(rewards_rep_2) / mc_rep se_ij_1 += list(rewards_rep_1) se_ij_2 += list(rewards_rep_2) standard_errors_1[i, j] = np.std(se_ij_1) / np.sqrt(len(se_ij_1)) standard_errors_2[i, j] = np.std(se_ij_2) / np.sqrt(len(se_ij_2)) # Get independent payoffs se_1 = [] se_2 = [] for rep in range(mc_rep): rewards_rep_1, rewards_rep_2, _, _ = \ bandit(policy='ind', time_horizon=time_horizon, n=n, sigma_tol=tau, sigma_upper=sigma_upper, env=env, epsilon_1=epsilon_1, epsilon_2=epsilon_2, **bandit_kwargs) payoffs_1[nA, :] += np.mean(rewards_rep_1) / mc_rep payoffs_1[:-1, nA] += np.mean(rewards_rep_1) / mc_rep payoffs_2[nA, :] += np.mean(rewards_rep_2) / mc_rep payoffs_2[:-1, nA] += np.mean(rewards_rep_2) / mc_rep se_1 += list(rewards_rep_1) se_2 += list(rewards_rep_2) standard_errors_1[nA, :] = np.std(se_1) / np.sqrt(len(rewards_rep_1)) standard_errors_1[:-1, nA] = np.std(se_1) / np.sqrt(len(rewards_rep_1)) standard_errors_2[nA, :] = np.std(se_2) / np.sqrt(len(rewards_rep_2)) standard_errors_2[:-1, nA] = np.std(se_2) / np.sqrt(len(rewards_rep_2)) # print(standard_errors_1) # print(standard_errors_2) # Compute nash payoffs_1 = payoffs_1.round(2) payoffs_2 = payoffs_2.round(2) d1, d2 = payoffs_1[nA, nA], payoffs_2[nA, nA] # e1, e2, _ = get_nash_welfare_optimal_eq(nash.Game(payoffs_1, payoffs_2), d1, d2) e1, e2, _ = get_welfare_optimal_eq(nash.Game(payoffs_1, payoffs_2)) se_1 = np.sqrt(np.dot(e1, np.dot(standard_errors_1**2, e2))) # ToDo: check se calculation se_2 = np.sqrt(np.dot(e1, np.dot(standard_errors_2**2, e2))) v1, v2 = expected_payoffs(payoffs_1, payoffs_2, e1, e2) return { 'epsilon_1_space': epsilon_1_space, 'epsilon_2_space': epsilon_2_space, 'e1': e1, 'e2': e2, 'v1': v1, 'v2': v2, 'payoffs_1': payoffs_1, 'payoffs_2': payoffs_2, 'se_1': se_1, 'se_2': se_2 }
def alternating(a1, a2, u1_mean=None, u2_mean=None, bias_2_1=np.zeros((2, 2)), bias_2_2=np.zeros((2, 2)), n=2, sigma_u=1, sigma_x=20, sigma_tol=0.1): """ ai: int in {0, 1}, 0=collab and 1=independent. """ # Parameters if u1_mean is None: u1_mean = np.array([[-10, 0], [-3, -1]]) if u2_mean is None: u2_mean = np.array([[-10, -3], [0, -1]]) sigma_x_mat = np.array([[sigma_x, sigma_x], [0., 0.]]) # Generate true 2x2 payoffs u1 = np.random.normal(loc=u1_mean, scale=sigma_u, size=(2, 2)) u2 = np.random.normal(loc=u2_mean, scale=sigma_u, size=(2, 2)) # Generate obs x1_1 = np.random.normal(loc=u1, scale=sigma_x_mat, size=(n, 2, 2)) x1_2 = np.random.normal(loc=u2, scale=sigma_x_mat, size=(n, 2, 2)) x2_1 = np.random.normal(loc=u1 + bias_2_1, scale=sigma_x_mat, size=(n, 2, 2)) x2_2 = np.random.normal(loc=u2 + bias_2_2, scale=sigma_x_mat, size=(n, 2, 2)) x1_1_mean = x1_1.mean(axis=0) x1_2_mean = x1_2.mean(axis=0) x2_1_mean = x2_1.mean(axis=0) x2_2_mean = x2_2.mean(axis=0) # x1_1_std = x1_1.std(axis=0) # x1_2_std = x1_2.std(axis=0) # x2_1_std = x2_1.std(axis=0) # x2_2_std = x2_2.std(axis=0) # Greedy alternating offers public_mean_1 = np.zeros((2, 2)) public_mean_2 = np.zeros((2, 2)) best_payoff_1 = -np.float('inf') best_payoff_2 = -np.float('inf') ixs_1 = [] ixs_2 = [] i = 0 # ToDo: check indices for nashpy # ToDo: rule for rejecting done = False t = 0 if a1 == 1 and a2 == 1: while t < 1 and not done: # ToDo: passing known sigma_x to check_matrix_distances in get_welfare_optimal_observation, whereas in practice # ToDo: this is unknown best_payoff_1_t, best_obs_11, best_obs_12, best_ix_1, best_a1, best_a2 = \ get_welfare_optimal_observation(x1_1, x1_2, public_mean_1, public_mean_2, len(ixs_1) + len(ixs_2), x1_1_mean, x1_2_mean, ixs_1, sigma_x / np.sqrt(n), sigma_tol=sigma_tol) ixs_1.append(best_ix_1) public_mean_1 += (best_obs_11 - public_mean_1) / (len(ixs_1) + len(ixs_2)) public_mean_2 += (best_obs_12 - public_mean_2) / (len(ixs_1) + len(ixs_2)) # # Other player naively incorporates new info # x2_1_mean += (best_obs_1 - x2_1_mean) / (t + 1 + n) # x2_2_mean += (best_obs_2 - x2_2_mean) / (t + 1 + n) i += 1 x2_2 = [np.array([[-10, -3], [1., -1]])] x2_2.append(public_mean_2) # x2_1 = [x2_1_mean + np.random.uniform(low=-10, high=10, size=((2, 2))) for _ in range(n)] x2_1 = [np.array([[-10, -0.5], [-3, -1]])] x2_1.append(public_mean_1) best_payoff_2_t, best_obs_22, best_obs_21, best_ix_2, best_a2, best_a1 = \ get_welfare_optimal_observation(x2_2, x2_1, public_mean_2, public_mean_1, len(ixs_1) + len(ixs_2), x2_2_mean, x2_1_mean, ixs_2, sigma_x / np.sqrt(n), sigma_tol=sigma_tol) ixs_2.append(best_ix_2) public_mean_1 += (best_obs_21 - public_mean_1) / (len(ixs_1) + len(ixs_2)) public_mean_2 += (best_obs_22 - public_mean_2) / (len(ixs_1) + len(ixs_2)) # # Other player naively incorporates new info # x1_1_mean += (best_obs_1 - x1_1_mean) / (t + 1 + n) # x1_2_mean += (best_obs_2 - x1_2_mean) / (t + 1 + n) t += 1 i += 1 close_enough = check_matrix_distances(best_obs_11, best_obs_12, best_obs_21, best_obs_22, sigma_x / np.sqrt(n), sigma_tol=sigma_tol) if close_enough: d1, d2, _ = get_welfare_optimal_eq( nash.Game(public_mean_1, public_mean_2)) else: d1, _, _ = get_welfare_optimal_eq(nash.Game(x1_1_mean, x1_2_mean)) _, d2, _ = get_welfare_optimal_eq(nash.Game(x2_1_mean, x2_2_mean)) else: d1, _, _ = get_welfare_optimal_eq(nash.Game(x1_1_mean, x1_2_mean)) _, d2, _ = get_welfare_optimal_eq(nash.Game(x2_1_mean, x2_2_mean)) close_enough = False r1, r2 = expected_payoffs(u1, u2, d1, d2) return r1, r2, close_enough