示例#1
0
if __name__ == '__main__':
    # create Problem
    problem = MORLGridworld()
    random.seed(2)
    np.random.seed(2)
    # learning rate
    alfacheb = 0.11
    eps = 0.9
    ref_points = [[10.0, -1000.0, 10.0], [-1000.0, 10.0, 10.0],
                  [10.0, 10.0, -1000.0]]
    agents = []
    scalarization_weights = [0.0, 0.0]
    interactions = 1000
    log.info('Started reference point experiment')
    payoutslist = []
    for ref_p in xrange(len(ref_points)):
        agents.append(
            MORLHVBAgent(problem, alfacheb, eps, ref_points[ref_p],
                         scalarization_weights))

        payouts, moves, states = morl_interact_multiple_episodic(
            agents[ref_p], problem, interactions, max_episode_length=300)
        payoutslist.append(payouts)
        policy = PolicyFromAgent(problem, agents[ref_p], mode='greedy')
        # policy_heat_plot(problem, policy, states)

    plot_hypervolume(agents, problem, name='reference point')
    print 'final average reward' + str(np.mean(payoutslist[0], axis=0))
    print 'final average reward' + str(np.mean(payoutslist[1], axis=0))
    print 'final average reward' + str(np.mean(payoutslist[2], axis=0))
示例#2
0
        # make the interactions
        log.info('Playing %i interactions on chebyagent' % interactions)
        payouts, moves, states = morl_interact_multiple_episodic(
            chebyagent, problem, interactions, max_episode_length=300)
        # print("TEST(cheby): interactions made: \nP: "+str(payouts[:])+",\n M: " + str(moves[:]) + ",\n S: " +
        #     str(states[:]) + '\n')
        log.info('Playing %i interactions on hvb agent' % interactions)
        payouts2, moves2, states2 = morl_interact_multiple_episodic(
            hvbagent, problem, interactions, max_episode_length=300)
        # print("TEST(HVB): interactions made: \nP: "+str(payouts2[:])+",\n M: " + str(moves2[:]) + ",\n S: " +
        #      str(states2[:]) + '\n')

        # extract all volumes of each agent
        agents = [hvbagent, chebyagent
                  ]  # plot the evolution of both agents hypervolume metrics
        plot_hypervolume(agents, problem, name='agent')
        plt.figure()
        length = min([len(payouts), len(payouts2)])
        x = np.arange(length)
        if length != len(payouts):
            payouts = payouts[:length]
        else:
            payouts2 = payouts2[:length]
        plt.plot(x, payouts, 'r', label='cheb')
        plt.plot(x, payouts2, 'b', label='hvb')
        plt.show()

    if experiment_2:
        # list of agents with different weights
        agent_group = []
        # list of volumes
示例#3
0
        chebyagent,
        problem,
        interactions,
        max_episode_length=300,
        discounted_eps=False)
    # print("TEST(cheby): interactions made: \nP: "+str(payouts[:])+",\n M: " + str(moves[:]) + ",\n S: " +
    #       str(states[:]) + '\n')
    #, moves, states = morl_interact_multiple_average_episodic(chebyagent, problem, 10, 500)

    # time = problem.time_token
    chebyagent._epsilon = 0.9
    payouts, moves2, states = morl_interact_multiple_episodic(
        chebyagent, problem, 1, 300)
    velocity = problem.get_velocities(states)
    states = problem.create_plottable_states(states)
    plot_hypervolume([chebyagent], problem)
    forward_acc = []
    backward_acc = []
    nothin = []
    for i in xrange(len(moves)):
        counter = list(moves[i])
        nothin.append(counter.count(0))
        forward_acc.append(counter.count(1))
        backward_acc.append(counter.count(2))
    x = np.arange(len(nothin))
    if show_trend:
        nothin = mean_continued(nothin)
        backward_acc = mean_continued(backward_acc)
        forward_acc = mean_continued(forward_acc)
    plt.plot(x, nothin, 'y', label='no_acceleration')
    plt.plot(x, forward_acc, 'g', label='forward acceleration')
示例#4
0
    # agent = FixedPolicyAgent(problem, exp_policy)
    # agent = QMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = PreScalarizedQMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = SARSAMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = SARSALambdaMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, lmbda=0.9)
    agent = MORLScalarizingAgent(problem, scalarization_weights, alfa, eps,
                                 4.0, [-1.0, -1.0, -1.0])
    # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150)
    payouts, moves, states = morl_interact_multiple_episodic(
        agent, problem, interactions=interactions, max_episode_length=150)

    learned_policy = PolicyFromAgent(problem, agent, mode='gibbs')
    # learned_policy = PolicyFromAgent(problem, agent, mode='greedy')

    # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S")

    ## Plotting ##

    # plt.ion()

    # figure_file_name = 'fig_runs-' + str(interactions) + "-" + agent.name() + ".png"
    titlestring = agent.name()
    # policy_plot2(problem, learned_policy, title=None, filename=titlestring)
    # policy_heat_plot(problem, learned_policy, states)
    # pickle_file_name = titlestring + '_' + time.strftime("%H%M%S") + '.p'
    # pickle.dump((payouts, moves, states, problem, agent), open(pickle_file_name, "wb"))

    # plt.ioff()
    plot_hypervolume([agent], problem)
    log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))
示例#5
0
    chebyagent2 = MORLScalarizingAgent(problem2, [1.0, 0.0, 0.0], alpha=alfacheb, epsilon=eps,
                                      tau=tau, ref_point=ref)
    # both agents interact (times):
    interactions = 1000
    # make the interactions
    log.info('Playing %i interactions on chebyagent' % interactions)
    payouts, moves, states = morl_interact_multiple_episodic(chebyagent, problem, interactions,
                                                            max_episode_length=300)
    # print("TEST(cheby): interactions made: \nP: "+str(payouts[:])+",\n M: " + str(moves[:]) + ",\n S: " +
    #     str(states[:]) + '\n')
    log.info('Playing %i interactions on hvb agent' % interactions)
    payouts2, moves2, states2 = morl_interact_multiple_episodic(chebyagent2, problem, interactions,
                                                                max_episode_length=299)
    # print("TEST(HVB): interactions made: \nP: "+str(payouts2[:])+",\n M: " + str(moves2[:]) + ",\n S: " +
    #      str(states2[:]) + '\n')

    # extract all volumes of each agent
    agents = [chebyagent2, chebyagent]   # plot the evolution of both agents hypervolume metrics
    plot_hypervolume(agents, problem, name='agent')
    plt.figure()
    length = min([len(payouts), len(payouts2)])
    x = np.arange(length)
    if length != len(payouts):
        payouts = payouts[:length]
    else:
        payouts2 = payouts2[:length]
    plt.plot(x, payouts, 'r', label='cheb')
    plt.plot(x, payouts2, 'b', label='hvb')
    plt.show()

示例#6
0
    interactions = 600
    if epsilon_experiment:
        log.info('Started epsilon experiment')
        for eps in xrange(len(epsilons)):
            agents.append(
                MORLScalarizingAgent(
                    problem,
                    epsilon=epsilons[eps],
                    alpha=alfacheb,
                    scalarization_weights=scalarization_weights,
                    ref_point=ref,
                    tau=tau,
                    function='chebishev'))
            morl_interact_multiple_episodic(agents[eps], problem, interactions)

        plot_hypervolume(agents, problem, name='epsilon')

    if gamma_experiment:
        log.info('Started gamma experiment')
        for gam in xrange(len(gammas)):
            agents.append(
                MORLScalarizingAgent(
                    problem,
                    epsilon=0.1,
                    alpha=alfacheb,
                    scalarization_weights=scalarization_weights,
                    ref_point=ref,
                    tau=tau,
                    function='chebishev',
                    gamma=gammas[gam]))
            morl_interact_multiple_episodic(agents[gam], problem, interactions)