Exemplo n.º 1
0
    def __init__(self, morl_problem, gamma=0.9):
        """
        Constructor
        :param morl_problem: problem we train on
        :param gamma: discount factor
        """
        self._problem = morl_problem
        self._gamma = gamma
        self._q_shape = (morl_problem.n_states, morl_problem.n_actions,
                         morl_problem.reward_dimension)
        self.s_a_mapping = dict()
        for s in xrange(self._problem.n_states):
            for a in xrange(self._problem.n_actions):
                self.s_a_mapping[s, a] = len(self.s_a_mapping)
        self._Q_sets = list([[
            0,
        ] * self._problem.reward_dimension]
                            for s in xrange(len(self.s_a_mapping)))

        self._V = list([[[
            0,
        ] * self._problem.reward_dimension]
                        for s in xrange(self._problem.n_states)])
        ref = [
            -1.0,
        ] * self._problem.reward_dimension
        self.hv_calculator = HyperVolumeCalculator(ref)
        self._Q = np.zeros((morl_problem.n_states, morl_problem.n_actions))
Exemplo n.º 2
0
 def setUp(self):
     # create refpoints
     self.ref_point2d = [0.1, 0.1]
     self.ref_point3d = [0.1, 0.1, 0.1]
     # data set / random points between 0/0 - 1/1
     self.set2d = np.zeros((70, 2))
     self.set3d = np.zeros((100, 3))
     for i in range(70):
         for u in range(2):
             self.set2d[i, u] = random.random()
     for i in range(100):
         for u in range(3):
             self.set3d[i, u] = random.random()
     # initialize calculator
     self.hv_2d_calc = HyperVolumeCalculator(self.ref_point2d)
     self.hv_3d_calc = HyperVolumeCalculator(self.ref_point3d)
from inspyred.ec.analysis import hypervolume
import matplotlib as mpl
if __name__ == '__main__':
    mpl.rc('text', usetex=True)
    mpl.rcParams['mathtext.fontset'] = 'stix'
    mpl.rcParams['font.family'] = 'STIXGeneral'
    count = 20
    random.seed(18.9654)
    ref_point2d = [0.0, 0.3]
    set2d = np.zeros((count, 2))
    for i in range(count):
        for u in range(2):
            rand = random.random()
            set2d[i, u] = rand if (rand > ref_point2d) or (
                rand > 0.3) else random.random()
    hv_2d_calc = HyperVolumeCalculator(ref_point2d)
    pf = hv_2d_calc.extract_front(set2d)
    hv = hv_2d_calc.compute_hv(pf)
    size = 0.48 * 5.8091048611149611602
    fig = plt.figure(figsize=[size, 0.75 * size])

    fig.set_size_inches(size, 0.7 * size)

    ax = fig.add_subplot(111)

    ax.set_axisbelow(True)

    ###########################################################################################
    plt.axis(
        [-0.06,
         max(set2d[:, 0] * 1.06), 0 - 0.18,
Exemplo n.º 4
0
    # epsilon = propability that the agent choses a greedy action instead of a random action
    epsilon = 0.9
    # learning rate
    alfah = 0.65
    # how many interactions should the action train per weight?
    interactions = 1000
    # how many episodes in one interactions should be taken? (if the problem gets in a terminal state, it will be
    # interrupted anyway (episodes = steps in the environment = actions)
    max_per_interaction = 100
    # count of final weighted average reward that don't differ from the last ones to interrupt and signify converge
    converging_criterium = 20
    ref = [
        -0.001,
    ] * problem.reward_dimension
    # we want to evaluate both policy set with hypervolume indicator
    hv_calculator = HyperVolumeCalculator(ref)
    # create agent
    # agent = MultipleCriteriaH(problem, n_vectors, delta, epsilon, alfa, interactions, max_per_interaction,
    #                          converging_criterium)
    agent = MultipleCriteriaH(problem, n_vectors, deltah, epsilon, alfah,
                              interactions, max_per_interaction,
                              converging_criterium)

    # start the training
    agent.weight_training()

    hvs = []
    rho = [i for i in agent.rhos.values()]
    pf = []
    for i in xrange(len(rho)):
        pf.append(rho[i])
Exemplo n.º 5
0
from morlbench.helpers import HyperVolumeCalculator
import matplotlib as mpl
if __name__ == '__main__':
    random.seed(3323)
    mpl.rc('text', usetex=True)
    mpl.rcParams['mathtext.fontset'] = 'stix'
    mpl.rcParams['font.family'] = 'STIXGeneral'

    u = 20
    ref_point2d = [0.1, 0.1]
    set2d = np.zeros((u, 2))
    for i in range(u):
        for u in range(2):
            rand = random.random()
            set2d[i, u] = rand if (rand > ref_point2d) else random.random()
    hv_2d_calc = HyperVolumeCalculator(ref_point2d)
    pf = hv_2d_calc.extract_front(set2d)
    size = 0.48 * 5.8091048611149611602
    fig = plt.figure(figsize=[size, 0.75 * size])
    fig.set_size_inches(size, 0.7 * size)
    ax = fig.add_subplot(1, 1, 1)
    plt.axis(
        [0 - 0.1,
         max(set2d[:, 0] * 1.21), 0 - 0.1,
         max(set2d[:, 1] * 1.1)])
    plt.setp(ax.get_xticklabels(), fontsize=9)
    plt.setp(ax.get_yticklabels(), fontsize=9)
    pfx = [pf[i][0] for i in range(len(pf))]
    pfy = [pf[u][1] for u in range(len(pf))]
    plt.plot(set2d[:, 0], set2d[:, 1], 'ro', markersize=4)
    plt.plot(pfx, pfy, 'bo', markersize=4)
Exemplo n.º 6
0
import matplotlib.pyplot as plt

if __name__ == '__main__':
    # create Problem
    problem = MORLGridworld()
    # create an initialize randomly a weight vector
    scalarization_weights = [1.0, 0.0, 0.0]
    # tau is for chebyshev agent
    tau = 4.0
    # ref point is used for Hypervolume calculation
    ref = [-1.0, ]*problem.reward_dimension
    # learning rate
    alfacheb = 0.11
    # Propability of epsilon greedy selection
    eps = 0.1
    hv_calc = HyperVolumeCalculator(ref)
    # create one agent using chebyshev scalarization method
    chebyagent = MORLScalarizingAgent(problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights,
                                      ref_point=ref, tau=tau)

    linearagent = MORLScalarizingAgent(problem, epsilon=eps, alpha=alfacheb, scalarization_weights=scalarization_weights,
                                       ref_point=ref, tau=tau, function='linear')
    # both agents interact (times):
    interactions = 1000

    c_payouts, c_moves, c_states = morl_interact_multiple_episodic(chebyagent, problem, interactions,
                                                                 max_episode_length=150)

    l_payouts, l_moves, l_states = morl_interact_multiple_episodic(linearagent, problem, interactions,
                                                                 max_episode_length=150)