Пример #1
0
            results = pickle.load(f)
    else:
        results = Parallel(n_jobs=4)(delayed(onerun)(r)
                                     for r in xrange(independent_runs))
        with open("eu_prediction_sweep.pickle", 'wb') as f:
            pickle.dump(results, f)

    for r in xrange(len(results)):
        avg_payouts[r, :] = results[r][0]
        learned_actions[r, :] = results[r][1]

    avg_payouts = avg_payouts.mean(axis=0)
    learned_actions = learned_actions.mean(axis=0)

    plot_that_pretty_rldm15(
        [np.linspace(linspace_from, linspace_to, linspace_steps)],
        [avg_payouts], ["EU"], "Prediction Accuracy", (0, 1.1, 0.2), "Payout",
        (0, 1001000, 100000), 'eu_agent_payout.pdf')

    plot_that_pretty_rldm15(
        [np.linspace(linspace_from, linspace_to, linspace_steps)],
        [learned_actions], ["EU"], "Prediction Accuracy", (0, 1.1, 0.2),
        "Learned Action", (0, 1.1, 0.2), 'eu_agent_learned_action.pdf')

#     fig = plt.figure()
#     plt.xlabel('prediction accuracy')
#     plt.ylabel('payout')
#     plt.plot(np.linspace(linspace_from, linspace_to,
#                          linspace_steps), avg_payouts, label='EUAgent')
#     plt.legend(loc='upper center')
#     plt.savefig("eu_agent_payout.pdf")
#     fig = plt.figure()
        avg_payouts1.append(avg_payout1)
        std_payouts1.append(std_payout1)
        avg_payouts2.append(avg_payout2)
        std_payouts2.append(std_payout2)
        avg_total1.append(avg_totall1)
        avg_total2.append(avg_totall2)

        log.info('Average Payout: %.3f vs. %.3f (total: %.3f vs. %.3f)' %
                 (avg_payout1, avg_payout2, avg_totall1, avg_totall2))

    avg_payouts1 = np.array(avg_payouts1)
    std_payouts1 = np.array(std_payouts1)
    avg_payouts2 = np.array(avg_payouts2)
    std_payouts2 = np.array(std_payouts2)
    avg_total1 = np.array(avg_total1)
    avg_total2 = np.array(avg_total2)

    plot_that_pretty_rldm15([
        np.linspace(linspace_from, linspace_to, linspace_steps),
        np.linspace(linspace_from, linspace_to, linspace_steps)
    ], [avg_payouts1, avg_payouts2], ["Defect", "Cooperate"],
                            "Cooperation Probability", (0, 1.1, 0.2), "Payout",
                            (0, 6, 1), 'defect_vs_cooperate.pdf')

    plot_that_pretty_rldm15([
        np.linspace(linspace_from, linspace_to, linspace_steps),
        np.linspace(linspace_from, linspace_to, linspace_steps)
    ], [avg_total1, avg_total2], ["Defect", "Cooperate"],
                            "Cooperation Probability", (0, 1.1, 0.2), "Payout",
                            (0, 7, 1), 'defect_vs_cooperate_total_payout.pdf')
    avg_payout_sarsa = avg_payout_sarsa.mean(axis=0)
    learned_actions_sarsa = learned_actions_sarsa.mean(axis=0)
    avg_payout_eu = avg_payout_eu.mean(axis=0)
    learned_actions_eu = learned_actions_eu.mean(axis=0)
    avg_payout_avgq = avg_payout_avgq.mean(axis=0)
    learned_actions_avgq = learned_actions_avgq.mean(axis=0)

    y_range = (300000, 1001000, 100000)
    plot_that_pretty_rldm15(
        [
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps)
        ], [avg_payout_sarsa, avg_payout_avgq, avg_payout_eu],
        ["SARSA", "AVGQ", "EU"],
        "Prediction Accuracy", (0, 1.1, 0.2),
        "Payout",
        y_range,
        'figure_1_c_combined_newcomb_sarsa_avg_eu_payout.pdf',
        custom_yticks=["%iK" % (int(x / 1000.0)) for x in np.arange(*y_range)],
        fontsize=25,
        label_fontsize=25,
        label_offsets=[-30000, 0.0, 0])

    y_range = (0, 1.1, 0.2)
    plot_that_pretty_rldm15(
        [
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps)
        ], [learned_actions_sarsa, learned_actions_avgq, learned_actions_eu],
Пример #4
0
                                                   [1001000, 1000]]))
        agent1 = OneBoxNewcombAgent(problem1)
        agent2 = TwoBoxNewcombAgent(problem2)

        log.info('Playing ...')
        log.info('%s' % (str(agent1)))
        log.info('%s' % (str(problem1)))
        log.info(' VERSUS')
        log.info('%s' % (str(agent2)))
        log.info('%s' % (str(problem2)))

        _, payouts1 = interact_multiple(agent1, problem1, interactions)
        _, payouts2 = interact_multiple(agent2, problem2, interactions)
        avg_payout1 = payouts1.mean(axis=0)
        avg_payout2 = payouts2.mean(axis=0)

        avg_payouts1.append(avg_payout1)
        avg_payouts2.append(avg_payout2)

        log.info('Average Payout: %.3f vs. %.3f' % (avg_payout1, avg_payout2))

    avg_payouts1 = np.array(avg_payouts1)
    avg_payouts2 = np.array(avg_payouts2)

    plot_that_pretty_rldm15([
        np.linspace(linspace_from, linspace_to, linspace_steps),
        np.linspace(linspace_from, linspace_to, linspace_steps)
    ], [avg_payouts1, avg_payouts2], ["TwoBoxer", "OneBoxer"],
                            "Prediction Accuracy", (0, 1.1, 0.2), "Payout",
                            (0, 1001001, 100000), 'one_vs_two_box.pdf')
        avg_payout_eu[r, :] = euresults[r][0]
        learned_actions_eu[r, :] = euresults[r][1]
    for r in xrange(len(avgqresults)):
        avg_payout_avgq[r, :] = avgqresults[r][0]
        learned_actions_avgq[r, :] = avgqresults[r][1]

    avg_payout_sarsa = avg_payout_sarsa.mean(axis=0)
    learned_actions_sarsa = learned_actions_sarsa.mean(axis=0)
    avg_payout_eu = avg_payout_eu.mean(axis=0)
    learned_actions_eu = learned_actions_eu.mean(axis=0)
    avg_payout_avgq = avg_payout_avgq.mean(axis=0)
    learned_actions_avgq = learned_actions_avgq.mean(axis=0)

    plot_that_pretty_rldm15([
        np.linspace(linspace_from, linspace_to, linspace_steps),
        np.linspace(linspace_from, linspace_to, linspace_steps),
        np.linspace(linspace_from, linspace_to, linspace_steps)
    ], [avg_payout_sarsa, avg_payout_avgq, avg_payout_eu],
                            ["SARSA", "AVGQ", "EU"], "Prediction Accuracy",
                            (0, 1.1, 0.2), "Payout", (0, 1001000, 100000),
                            'combined_newcomb_sarsa_avgq_eu_payout.pdf')

    plot_that_pretty_rldm15(
        [
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps)
        ], [learned_actions_sarsa, learned_actions_avgq, learned_actions_eu],
        ["SARSA", "AVGQ", "EU"], "Prediction Accuracy", (0, 1.1, 0.2),
        "Learned Action", (0, 1.1, 0.2),
        'combined_newcomb_sarsa_avgq_eu_learned_action.pdf')
from plotting_stuff import plot_that_pretty_rldm15

if __name__ == '__main__':
    from experiment_definitions import experiments

    inputs = [e['picklefile'] for e in experiments]

    if reduce(lambda a, b: a or b, map(lambda n: not os.path.isfile(n),
                                       inputs)):
        print >> sys.stderr, "run pd_two_player_generic.py first to \
            create the .pickle files"

        sys.exit(1)

    for e in experiments:
        with open(e['picklefile']) as f:
            resultstruct = pickle.load(f)
            results = resultstruct['results']
            aparams1 = resultstruct['aparams1']
            aparams2 = resultstruct['aparams2']
            pparams = resultstruct['pparams']
            expparams = resultstruct['expparams']

            plotparams = resultstruct['plotparams']

            for p in plotparams:
                p['xdata'] = p['xdata'](results, e)
                p['ydata'] = p['ydata'](results, e)
                plot_that_pretty_rldm15(**p)
        avg_payouts2.append(avg_payout2)
        learned_actions1.append(learned_action1)
        learned_actions2.append(learned_action2)

    # TODO: this indexing is awkward, man why do you introduce 'metadata'
    #       above if you don't use it.
    y_range = (0, 7, 1)
    y_range_print = (1, 7, 1)
    plot_that_pretty_rldm15(
        [
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps)
        ], [avg_payouts1[0], avg_payouts2[0]], ["SARSA 1", "SARSA 2"],
        r"$\epsilon$", (0, 1.1, 0.2),
        "Payout",
        y_range,
        'pd_sarsa_avg_payout_normal.pdf',
        custom_yticks=[""] +
        ["%i" % (int(x)) for x in np.arange(*y_range_print)],
        fontsize=25,
        label_fontsize=25,
        y_lim=(0, 6),
        label_offsets=[-0.1, -0.4, 0.0, 0.0])

    y_range = (0, 1100000, 100000)
    plot_that_pretty_rldm15(
        [
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps)
        ],
        [avg_payouts1[1], avg_payouts2[1]],
        avg_payouts.append(avg_payout)
        learned_actions.append(learned_action)

    # TODO: this indexing is awkward, man why do you introduce 'metadata'
    #       above if you don't use it.
    y_range = (0, 7, 1)
    y_range_print = (1, 7, 1)
    plot_that_pretty_rldm15(
        [
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps)
        ], [avg_payouts[0], avg_payouts[1], avg_payouts[4], avg_payouts[5]],
        ["SARSA (I)", "SARSA (T)", "AVGQ (I)", "AVGQ (T)"],
        "Prediction Accuracy", (0, 1.1, 0.2),
        "Payout",
        y_range,
        'figure_2_c_combined_pd_sarsa_avg_payout_normal.pdf',
        custom_yticks=[""] +
        ["%i" % (int(x)) for x in np.arange(*y_range_print)],
        fontsize=25,
        label_fontsize=25,
        y_lim=(0, 6),
        label_offsets=[-0.1, -0.4, 0.0, 0.0])

    y_range = (0, 1100000, 100000)
    plot_that_pretty_rldm15(
        [
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps),
        log.info(' VERSUS')
        log.info('%s' % (str(agent2)))
        log.info('%s' % (str(problem2)))

        _, payouts1 = interact_multiple(agent1, problem1, interactions)
        _, payouts2 = interact_multiple(agent2, problem2, interactions)
        avg_payout1 = payouts1.mean(axis=0)
        avg_payout2 = payouts2.mean(axis=0)

        avg_payouts1.append(avg_payout1)
        avg_payouts2.append(avg_payout2)

        log.info('Average Payout: %.3f vs. %.3f' % (avg_payout1, avg_payout2))

    avg_payouts1 = np.array(avg_payouts1)
    avg_payouts2 = np.array(avg_payouts2)

    y_range = (0, 1001000, 100000)
    plot_that_pretty_rldm15(
        [
            np.linspace(linspace_from, linspace_to, linspace_steps),
            np.linspace(linspace_from, linspace_to, linspace_steps)
        ], [avg_payouts1, avg_payouts2], ["TwoBoxer", "OneBoxer"],
        "Prediction Accuracy", (0, 1.1, 0.2),
        "Payout",
        y_range,
        'figure_1_a_one_vs_twoboxing.pdf',
        custom_yticks=["%iK" % (int(x / 1000.0)) for x in np.arange(*y_range)],
        fontsize=25,
        label_fontsize=25)