results = pickle.load(f) else: results = Parallel(n_jobs=4)(delayed(onerun)(r) for r in xrange(independent_runs)) with open("eu_prediction_sweep.pickle", 'wb') as f: pickle.dump(results, f) for r in xrange(len(results)): avg_payouts[r, :] = results[r][0] learned_actions[r, :] = results[r][1] avg_payouts = avg_payouts.mean(axis=0) learned_actions = learned_actions.mean(axis=0) plot_that_pretty_rldm15( [np.linspace(linspace_from, linspace_to, linspace_steps)], [avg_payouts], ["EU"], "Prediction Accuracy", (0, 1.1, 0.2), "Payout", (0, 1001000, 100000), 'eu_agent_payout.pdf') plot_that_pretty_rldm15( [np.linspace(linspace_from, linspace_to, linspace_steps)], [learned_actions], ["EU"], "Prediction Accuracy", (0, 1.1, 0.2), "Learned Action", (0, 1.1, 0.2), 'eu_agent_learned_action.pdf') # fig = plt.figure() # plt.xlabel('prediction accuracy') # plt.ylabel('payout') # plt.plot(np.linspace(linspace_from, linspace_to, # linspace_steps), avg_payouts, label='EUAgent') # plt.legend(loc='upper center') # plt.savefig("eu_agent_payout.pdf") # fig = plt.figure()
avg_payouts1.append(avg_payout1) std_payouts1.append(std_payout1) avg_payouts2.append(avg_payout2) std_payouts2.append(std_payout2) avg_total1.append(avg_totall1) avg_total2.append(avg_totall2) log.info('Average Payout: %.3f vs. %.3f (total: %.3f vs. %.3f)' % (avg_payout1, avg_payout2, avg_totall1, avg_totall2)) avg_payouts1 = np.array(avg_payouts1) std_payouts1 = np.array(std_payouts1) avg_payouts2 = np.array(avg_payouts2) std_payouts2 = np.array(std_payouts2) avg_total1 = np.array(avg_total1) avg_total2 = np.array(avg_total2) plot_that_pretty_rldm15([ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [avg_payouts1, avg_payouts2], ["Defect", "Cooperate"], "Cooperation Probability", (0, 1.1, 0.2), "Payout", (0, 6, 1), 'defect_vs_cooperate.pdf') plot_that_pretty_rldm15([ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [avg_total1, avg_total2], ["Defect", "Cooperate"], "Cooperation Probability", (0, 1.1, 0.2), "Payout", (0, 7, 1), 'defect_vs_cooperate_total_payout.pdf')
avg_payout_sarsa = avg_payout_sarsa.mean(axis=0) learned_actions_sarsa = learned_actions_sarsa.mean(axis=0) avg_payout_eu = avg_payout_eu.mean(axis=0) learned_actions_eu = learned_actions_eu.mean(axis=0) avg_payout_avgq = avg_payout_avgq.mean(axis=0) learned_actions_avgq = learned_actions_avgq.mean(axis=0) y_range = (300000, 1001000, 100000) plot_that_pretty_rldm15( [ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [avg_payout_sarsa, avg_payout_avgq, avg_payout_eu], ["SARSA", "AVGQ", "EU"], "Prediction Accuracy", (0, 1.1, 0.2), "Payout", y_range, 'figure_1_c_combined_newcomb_sarsa_avg_eu_payout.pdf', custom_yticks=["%iK" % (int(x / 1000.0)) for x in np.arange(*y_range)], fontsize=25, label_fontsize=25, label_offsets=[-30000, 0.0, 0]) y_range = (0, 1.1, 0.2) plot_that_pretty_rldm15( [ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [learned_actions_sarsa, learned_actions_avgq, learned_actions_eu],
[1001000, 1000]])) agent1 = OneBoxNewcombAgent(problem1) agent2 = TwoBoxNewcombAgent(problem2) log.info('Playing ...') log.info('%s' % (str(agent1))) log.info('%s' % (str(problem1))) log.info(' VERSUS') log.info('%s' % (str(agent2))) log.info('%s' % (str(problem2))) _, payouts1 = interact_multiple(agent1, problem1, interactions) _, payouts2 = interact_multiple(agent2, problem2, interactions) avg_payout1 = payouts1.mean(axis=0) avg_payout2 = payouts2.mean(axis=0) avg_payouts1.append(avg_payout1) avg_payouts2.append(avg_payout2) log.info('Average Payout: %.3f vs. %.3f' % (avg_payout1, avg_payout2)) avg_payouts1 = np.array(avg_payouts1) avg_payouts2 = np.array(avg_payouts2) plot_that_pretty_rldm15([ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [avg_payouts1, avg_payouts2], ["TwoBoxer", "OneBoxer"], "Prediction Accuracy", (0, 1.1, 0.2), "Payout", (0, 1001001, 100000), 'one_vs_two_box.pdf')
avg_payout_eu[r, :] = euresults[r][0] learned_actions_eu[r, :] = euresults[r][1] for r in xrange(len(avgqresults)): avg_payout_avgq[r, :] = avgqresults[r][0] learned_actions_avgq[r, :] = avgqresults[r][1] avg_payout_sarsa = avg_payout_sarsa.mean(axis=0) learned_actions_sarsa = learned_actions_sarsa.mean(axis=0) avg_payout_eu = avg_payout_eu.mean(axis=0) learned_actions_eu = learned_actions_eu.mean(axis=0) avg_payout_avgq = avg_payout_avgq.mean(axis=0) learned_actions_avgq = learned_actions_avgq.mean(axis=0) plot_that_pretty_rldm15([ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [avg_payout_sarsa, avg_payout_avgq, avg_payout_eu], ["SARSA", "AVGQ", "EU"], "Prediction Accuracy", (0, 1.1, 0.2), "Payout", (0, 1001000, 100000), 'combined_newcomb_sarsa_avgq_eu_payout.pdf') plot_that_pretty_rldm15( [ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [learned_actions_sarsa, learned_actions_avgq, learned_actions_eu], ["SARSA", "AVGQ", "EU"], "Prediction Accuracy", (0, 1.1, 0.2), "Learned Action", (0, 1.1, 0.2), 'combined_newcomb_sarsa_avgq_eu_learned_action.pdf')
from plotting_stuff import plot_that_pretty_rldm15 if __name__ == '__main__': from experiment_definitions import experiments inputs = [e['picklefile'] for e in experiments] if reduce(lambda a, b: a or b, map(lambda n: not os.path.isfile(n), inputs)): print >> sys.stderr, "run pd_two_player_generic.py first to \ create the .pickle files" sys.exit(1) for e in experiments: with open(e['picklefile']) as f: resultstruct = pickle.load(f) results = resultstruct['results'] aparams1 = resultstruct['aparams1'] aparams2 = resultstruct['aparams2'] pparams = resultstruct['pparams'] expparams = resultstruct['expparams'] plotparams = resultstruct['plotparams'] for p in plotparams: p['xdata'] = p['xdata'](results, e) p['ydata'] = p['ydata'](results, e) plot_that_pretty_rldm15(**p)
avg_payouts2.append(avg_payout2) learned_actions1.append(learned_action1) learned_actions2.append(learned_action2) # TODO: this indexing is awkward, man why do you introduce 'metadata' # above if you don't use it. y_range = (0, 7, 1) y_range_print = (1, 7, 1) plot_that_pretty_rldm15( [ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [avg_payouts1[0], avg_payouts2[0]], ["SARSA 1", "SARSA 2"], r"$\epsilon$", (0, 1.1, 0.2), "Payout", y_range, 'pd_sarsa_avg_payout_normal.pdf', custom_yticks=[""] + ["%i" % (int(x)) for x in np.arange(*y_range_print)], fontsize=25, label_fontsize=25, y_lim=(0, 6), label_offsets=[-0.1, -0.4, 0.0, 0.0]) y_range = (0, 1100000, 100000) plot_that_pretty_rldm15( [ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [avg_payouts1[1], avg_payouts2[1]],
avg_payouts.append(avg_payout) learned_actions.append(learned_action) # TODO: this indexing is awkward, man why do you introduce 'metadata' # above if you don't use it. y_range = (0, 7, 1) y_range_print = (1, 7, 1) plot_that_pretty_rldm15( [ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [avg_payouts[0], avg_payouts[1], avg_payouts[4], avg_payouts[5]], ["SARSA (I)", "SARSA (T)", "AVGQ (I)", "AVGQ (T)"], "Prediction Accuracy", (0, 1.1, 0.2), "Payout", y_range, 'figure_2_c_combined_pd_sarsa_avg_payout_normal.pdf', custom_yticks=[""] + ["%i" % (int(x)) for x in np.arange(*y_range_print)], fontsize=25, label_fontsize=25, y_lim=(0, 6), label_offsets=[-0.1, -0.4, 0.0, 0.0]) y_range = (0, 1100000, 100000) plot_that_pretty_rldm15( [ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps),
log.info(' VERSUS') log.info('%s' % (str(agent2))) log.info('%s' % (str(problem2))) _, payouts1 = interact_multiple(agent1, problem1, interactions) _, payouts2 = interact_multiple(agent2, problem2, interactions) avg_payout1 = payouts1.mean(axis=0) avg_payout2 = payouts2.mean(axis=0) avg_payouts1.append(avg_payout1) avg_payouts2.append(avg_payout2) log.info('Average Payout: %.3f vs. %.3f' % (avg_payout1, avg_payout2)) avg_payouts1 = np.array(avg_payouts1) avg_payouts2 = np.array(avg_payouts2) y_range = (0, 1001000, 100000) plot_that_pretty_rldm15( [ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps) ], [avg_payouts1, avg_payouts2], ["TwoBoxer", "OneBoxer"], "Prediction Accuracy", (0, 1.1, 0.2), "Payout", y_range, 'figure_1_a_one_vs_twoboxing.pdf', custom_yticks=["%iK" % (int(x / 1000.0)) for x in np.arange(*y_range)], fontsize=25, label_fontsize=25)