Пример #1
0
    def testWeightVariation(self):
        """
        this test creates 6 different chebyshev agents whose weights are each different. in the end it compares hvs
        :return:
        """

        # list of agents
        self.agents = []
        # list of volumes
        self.vollist = []
        # 6 agents with each different weights
        self.agents.append(
            MORLScalarizingAgent(self.gridworldproblem, [1.0, 0.0, 0.0],
                                 alpha=self.alf,
                                 epsilon=self.eps,
                                 tau=self.tau,
                                 ref_point=self.ref))
        self.agents.append(
            MORLScalarizingAgent(self.gridworldproblem, [0.0, 1.0, 0.0],
                                 alpha=self.alf,
                                 epsilon=self.eps,
                                 tau=self.tau,
                                 ref_point=self.ref))
        self.agents.append(
            MORLScalarizingAgent(self.gridworldproblem, [0.5, 0.5, 0.0],
                                 alpha=self.alf,
                                 epsilon=self.eps,
                                 tau=self.tau,
                                 ref_point=self.ref))
        self.agents.append(
            MORLScalarizingAgent(self.gridworldproblem, [0.0, 0.5, 0.5],
                                 alpha=self.alf,
                                 epsilon=self.eps,
                                 tau=self.tau,
                                 ref_point=self.ref))
        self.agents.append(
            MORLScalarizingAgent(self.gridworldproblem, [0.5, 0.0, 0.5],
                                 alpha=self.alf,
                                 epsilon=self.eps,
                                 tau=self.tau,
                                 ref_point=self.ref))
        self.agents.append(
            MORLScalarizingAgent(self.gridworldproblem, [0.33, 0.33, 0.33],
                                 alpha=self.alf,
                                 epsilon=self.eps,
                                 tau=self.tau,
                                 ref_point=self.ref))
Пример #2
0
 def setUp(self):
     # create Problem
     self.gridworldproblem = MORLBuridansAssProblem()
     self.problem = Deepsea()
     # create an initialize randomly a weight vector
     self.scalarization_weights = np.zeros(self.problem.reward_dimension)
     self.scalarization_weights = random.sample(
         [i for i in np.linspace(0, 5, 5000)],
         len(self.scalarization_weights))
     # tau is for chebyshev agent
     self.tau = 4.0
     # ref point is used for Hypervolume calculation
     self.ref = [-1.0, -1.0, -1.0]
     # learning rate
     self.alf = 0.1
     self.alfacheb = 0.1
     self.alfahvb = 0.1
     # Propability of epsilon greedy selection
     self.eps = 0.1
     # create one agent using chebyshev scalarization method
     self.chebyagent = MORLScalarizingAgent(self.gridworldproblem,
                                            [1.0, 0.0, 0.0],
                                            alpha=self.alfacheb,
                                            epsilon=self.eps,
                                            tau=self.tau,
                                            ref_point=self.ref)
     # create one agent using Hypervolume based Algorithm
     self.hvbagent = MORLHVBAgent(self.gridworldproblem,
                                  alpha=self.alfahvb,
                                  epsilon=self.eps,
                                  ref=self.ref,
                                  scal_weights=[1.0, 10.0])
     self.hagent = MORLHLearningAgent(self.problem, self.eps, self.alf,
                                      self.scalarization_weights)
     # both agents interact (times):
     self.interactions = 200
     self.convHullAgent = MORLConvexHullValueIteration(self.problem)
     self.data = [[4, 2, 0], [1, 1, 1], [1, 1, 0], [1, 0, 1]]
Пример #3
0
    saved_weights = []
    plt.ion()
    problem = MORLBuridansAss1DProblem()
    scalarization_weights = np.array([1.0, 0.0, 0.0])
    eps = 0.9
    alfa = 0.08
    tau = 2.0
    ref_point = [
        -1.0,
    ] * problem.reward_dimension
    interactions = 1500
    chebyagent = MORLScalarizingAgent(
        problem,
        epsilon=eps,
        alpha=alfa,
        scalarization_weights=scalarization_weights,
        ref_point=ref_point,
        tau=tau,
        gamma=0.9,
        function='linear')

    payouts, moves, states = morl_interact_multiple_episodic(
        chebyagent, problem, interactions=interactions, max_episode_length=300)
    log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))

    volumes = [0]
    volumes.extend(chebyagent.max_volumes)
    x = np.arange(len(volumes))

    ##################################
    #               PLOT             #
Пример #4
0
    # tau is for chebyshev agent
    tau = 4.0
    # ref point is used for Hypervolume calculation
    ref = [-1.0, -1.0, -1.0]
    # learning rate
    alf = 0.2
    alfacheb = 0.2
    alfahvb = 0.1
    n_vectors = 5

    # Propability of epsilon greedy selection
    eps = 0.1
    # create one agent using scalarization method
    chebyagent = MORLScalarizingAgent(problem, [1.0, 0.0, 0.0],
                                      alpha=alfacheb,
                                      epsilon=eps,
                                      tau=tau,
                                      ref_point=ref)
    # create one agent using Hypervolume based Algorithm
    hvbagent = MORLHVBAgent(problem,
                            alpha=alfahvb,
                            epsilon=0.1,
                            ref=ref,
                            scal_weights=[1.0, 10.0])
    # both agents interact (times):
    interactions = 1000
    if experiment_1:
        # make the interactions
        log.info('Playing %i interactions on chebyagent' % interactions)
        payouts, moves, states = morl_interact_multiple_episodic(
            chebyagent, problem, interactions, max_episode_length=300)
Пример #5
0
    # ref point is used for Hypervolume calculation
    ref = [
        -10.0,
    ] * problem.reward_dimension
    # learning rate
    alfacheb = 0.4
    # Propability of epsilon greedy selection
    eps = 0.9
    # should we show total acceleration count or just trend:
    show_trend = True

    # create one agent using chebyshev scalarization method
    chebyagent = MORLScalarizingAgent(
        problem,
        epsilon=eps,
        alpha=alfacheb,
        scalarization_weights=scalarization_weights,
        ref_point=ref,
        tau=tau,
        gamma=0.9)
    # hvbagent = MORLHVBAgent(problem, alfacheb, eps, ref, [0.0, 0.0])

    # both agents interact (times):
    interactions = 300
    #
    payouts, moves, states = morl_interact_multiple_episodic(
        chebyagent,
        problem,
        interactions,
        max_episode_length=300,
        discounted_eps=False)
    # print("TEST(cheby): interactions made: \nP: "+str(payouts[:])+",\n M: " + str(moves[:]) + ",\n S: " +
Пример #6
0
    # scalarization_weights = np.array([0.0, 1.0])
    # scalarization_weights = np.array([0.9, 0.1])

    eps = 0.1
    alfa = 0.4
    runs = 2
    interactions = 1000

    # exp_policy = PolicyDeepseaExpert(problem, task='T2')
    # det_policy = PolicyDeepseaDeterministic(problem, policy='P1')
    # agent = FixedPolicyAgent(problem, exp_policy)
    # agent = QMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = PreScalarizedQMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = SARSAMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = SARSALambdaMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, lmbda=0.9)
    agent = MORLScalarizingAgent(problem, scalarization_weights, alfa, eps,
                                 4.0, [-1.0, -1.0, -1.0])
    # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150)
    payouts, moves, states = morl_interact_multiple_episodic(
        agent, problem, interactions=interactions, max_episode_length=150)

    learned_policy = PolicyFromAgent(problem, agent, mode='gibbs')
    # learned_policy = PolicyFromAgent(problem, agent, mode='greedy')

    # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S")

    ## Plotting ##

    # plt.ion()

    # figure_file_name = 'fig_runs-' + str(interactions) + "-" + agent.name() + ".png"
    titlestring = agent.name()
    scalarization_weights = [1.0, 0.0, 0.0]
    # tau is for chebyshev agent
    tau = 0.1
    # ref point is used for Hypervolume calculation
    ref = [
        -0.1,
    ] * problem.reward_dimension
    # learning rate
    alfacheb = 0.01
    # Propability of epsilon greedy selection
    eps = 0.7
    # create one agent using chebyshev scalarization method
    chebyagent = MORLScalarizingAgent(
        problem,
        epsilon=eps,
        alpha=alfacheb,
        scalarization_weights=scalarization_weights,
        ref_point=ref,
        tau=tau)
    # both agents interact (times):
    interactions = 2000
    n_vectors = 2

    if hypervolume_experiment:
        # make the interactions
        payouts, moves, states = morl_interact_multiple_episodic(
            chebyagent, problem, interactions, max_episode_length=150)
        print("TEST(cheby): interactions made: \nP: " + str(payouts[:]) +
              ",\n M: " + str(moves[:]) + ",\n S: " + str(states[:]) + '\n')

        plot_hypervolume([chebyagent], problem)
    def eps():
        for i in xrange(interactions / 2):
            yield 0.1
        yield 0.8

    alfa = 0.1
    runs = 1
    interactions = 10000
    ref_point = [
        -1.0,
    ] * problem.reward_dimension
    # agent = QMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = PreScalarizedQMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = SARSAMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = SARSALambdaMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, lmbda=0.9)
    agent = MORLScalarizingAgent(problem, scalarization_weights, alfa, eps(),
                                 4.0, ref_point)
    # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150)

    payouts, moves, states = morl_interact_multiple_episodic(
        agent, problem, interactions=interactions, max_episode_length=150)
    log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))

    # learned_policy = PolicyFromAgent(problem, agent, mode='gibbs')
    learned_policy = PolicyFromAgent(problem, agent, mode=None)
    # learned_policy = PolicyFromAgent(problem, agent, mode='greedy')
    # learned_policy = PolicyGridworld(problem, policy='DIAGONAL')
    # learned_policy = PolicyGridworld(problem, policy='RIGHT')
    # learned_policy = PolicyGridworld(problem, policy='DOWN')

    # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S")
Пример #9
0
    problem = MORLResourceGatheringProblem()

    # scalarization_weights = np.array([0.153, 0.847])
    # scalarization_weights = np.array([0.5, 0.5])
    scalarization_weights = np.array([0.5, 0.5, 0.0])
    # scalarization_weights = np.array([0.0, 1.0])
    # scalarization_weights = np.array([0.9, 0.1])

    eps = 0.4
    alfa = 0.4
    runs = 1
    interactions = 100
    max_steps = 100
    tau = 1.0

    agent = MORLScalarizingAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, tau=tau, gamma=1.0,
                                 ref_point=[-1.0, -1.0, -1.0])


    # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions, max_episode_length=150)
    payouts, moves, states = morl_interact_multiple_episodic(agent, problem, interactions=interactions)
    log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))

    learned_policy = PolicyFromAgent(problem, agent, mode='greedy')
    # learned_policy = PolicyFromAgent(problem, agent, mode='greedy')

    # filename = 'figure_' + time.strftime("%Y%m%d-%H%M%S")
    states = problem.create_plottable_states(states)

    ## Plotting ##

    # plt.ion()
Пример #10
0
if __name__ == '__main__':
    # create Problem
    problem = MORLGridworld()
    # create an initialize randomly a weight vector
    scalarization_weights = [1.0, 0.0, 0.0]
    # tau is for chebyshev agent
    tau = 4.0
    # ref point is used for Hypervolume calculation
    ref = [-1.0, ]*problem.reward_dimension
    # learning rate
    alfacheb = 0.11
    # Propability of epsilon greedy selection
    eps = 0.1
    hv_calc = HyperVolumeCalculator(ref)
    # create one agent using chebyshev scalarization method
    chebyagent = MORLScalarizingAgent(problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights,
                                      ref_point=ref, tau=tau)

    linearagent = MORLScalarizingAgent(problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights,
                                       ref_point=ref, tau=tau, function='linear')
    # both agents interact (times):
    interactions = 1000

    c_payouts, c_moves, c_states = morl_interact_multiple_episodic(chebyagent, problem, interactions,
                                                                 max_episode_length=150)

    l_payouts, l_moves, l_states = morl_interact_multiple_episodic(linearagent, problem, interactions,
                                                                 max_episode_length=150)

    c_rewards = []
    for i in xrange(len(c_payouts)):
        cummulated = np.zeros(problem.reward_dimension)
Пример #11
0
    gammas = np.arange(0, 1, 0.1)
    alphas = np.arange(0, 1, 0.1)
    taus = np.arange(0.0, 10.0, 1.0)
    ref_points = [[-1.0, -1.0, -25.0], [-1.0, -25.0, -1.0],
                  [-25.0, -1.0, -1.0]]
    # agents:
    agents = []
    interactions = 600
    if epsilon_experiment:
        log.info('Started epsilon experiment')
        for eps in xrange(len(epsilons)):
            agents.append(
                MORLScalarizingAgent(
                    problem,
                    epsilon=epsilons[eps],
                    alpha=alfacheb,
                    scalarization_weights=scalarization_weights,
                    ref_point=ref,
                    tau=tau,
                    function='chebishev'))
            morl_interact_multiple_episodic(agents[eps], problem, interactions)

        plot_hypervolume(agents, problem, name='epsilon')

    if gamma_experiment:
        log.info('Started gamma experiment')
        for gam in xrange(len(gammas)):
            agents.append(
                MORLScalarizingAgent(
                    problem,
                    epsilon=0.1,
                    alpha=alfacheb,
Пример #12
0
    eps = 0.9
    alfa = 0.3
    runs = 1
    interactions = 500
    max_steps = 150
    tau = 1.0
    ref_point = [
        -3.0,
    ] * problem.reward_dimension
    # hvbagent = MORLHVBAgent(problem, alpha=alfa, epsilon=0.9, ref=ref_point, scal_weights=[1.0, 10.0])

    agent = MORLScalarizingAgent(problem,
                                 scalarization_weights,
                                 alpha=alfa,
                                 epsilon=eps,
                                 tau=tau,
                                 lmbda=0.95,
                                 ref_point=ref_point)
    payouts, moves, states = morl_interact_multiple_episodic(
        agent, problem, interactions=interactions)
    log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))

    learned_policy = PolicyFromAgent(problem, agent, mode='greedy')

    states = problem.create_plottable_states(states)

    policy_plot2(problem, learned_policy, title=None, filename=None)
    policy_heat_plot(problem, learned_policy, states)
    plot_hypervolume([agent], problem)
    log.info('Average Payout: %s' % (str(payouts.mean(axis=0))))
Пример #13
0
        problem = MOPuddleworldProblem(size=20)
        scalarization_weights = np.array([1.0, 0.0])
        max_episode_l = 200

        alfa = 0.1
        tau = 1.0

        interactions = 50

        eps = 0.9

        agent = MORLScalarizingAgent(problem,
                                     scalarization_weights,
                                     alpha=alfa,
                                     epsilon=eps,
                                     tau=tau,
                                     lmbda=1.0,
                                     ref_point=[-1.0, -1.0])

        payouts, moves, states = morl_interact_multiple_episodic(
            agent,
            problem,
            interactions=interactions,
            max_episode_length=max_episode_l)
        agent.create_scalar_Q_table()
        x = [w for w in xrange(problem._size)]
        y = [d for d in xrange(problem._size)]
        x, y = np.meshgrid(x, y)
        z = np.array([
            max([agent.Qs[s, a] for a in xrange(problem.n_actions)])
Пример #14
0
    alfa = 0.1
    runs = 3
    interactions = 500
    episode_length = 150
    tau = 4.0  # only for Chebyshev and deepsea
    gamma = 0.9
    ref = [-1.0, -1.0, -1.0]  # reference point for hypervolume calculation
    # Select a learning agent:
    # agent1 = QMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = PreScalarizedQMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = SARSAMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps)
    # agent = SARSALambdaMorlAgent(problem, scalarization_weights, alpha=alfa, epsilon=eps, lmbda=0.9)
    agent = MORLScalarizingAgent(problem,
                                 scalarization_weights,
                                 alfa,
                                 eps,
                                 tau,
                                 ref_point=ref,
                                 gamma=gamma,
                                 function='chebishev')
    # agent = MORLHVBAgent(problem, alfa, eps, ref, scalarization_weights)
    # Run the experiment one time for the given number of interactions
    payouts, moves, states = morl_interact_multiple_episodic(
        agent,
        problem,
        interactions=interactions,
        max_episode_length=episode_length)

    # only for multidimensional state problems (resource gathering, buridans ass)
    states = problem.create_plottable_states(states)
    # Repeat experiment for "runs" times and average the results
    # payouts, moves, states = morl_interact_multiple_average_episodic(agent, problem, runs=runs, interactions=interactions,