Пример #1
0
    def test_multiDimNetworkQLearnigAgent(self):
        # pylint: disable=line-too-long
        q_agent = NetworkQLearningAgent(self.multi_dim_ndp,
                                        Ne=5,
                                        Rplus=2,
                                        alpha=lambda n: 60. / (59 + n),
                                        delta=0.5,
                                        max_iterations=100,
                                        calc_status=True)

        for _ in range(101):
            q_agent.reset()
            run_single_trial(q_agent, self.test_multi_dim_mdp,
                             self.sensor_model, self.motor_model)

        for status in ['energy', 'water']:
            l.debug('----- ' + status + '------')
            U, pi = q_agent.Q_to_U_and_pi()[status]

            # print the utilities and the policy also
            U1 = sorted(U.items(), key=lambda x: x[0])
            pi1 = sorted(pi.items(), key=lambda x: x[0])
            print_grid(U1)
            print_grid(pi1)

        save_csv_file('two_dim.csv', [self.multi_dim_ndp.history],
                      self.ndp.history_headers, OUTPUT_DIR)
        l.debug('test_multiDimNetworkQLearnigAgent:',
                self.multi_dim_ndp.statuses)
Пример #2
0
    def test_networkQLearnigAgentPrintDetails(self):
        for i in range(100):
            l.debug('**** TRIAL: ', i, ' ****')
            q_agent = NetworkQLearningAgent(self.ndp,
                                            Ne=5,
                                            Rplus=2,
                                            alpha=lambda n: 60. / (59 + n),
                                            delta=0.5,
                                            max_iterations=100,
                                            calc_status=True)
            q_agent.reset()

            run_single_trial(q_agent, self.test_mdp, self.sensor_model,
                             self.motor_model, DEBUG_MODE)
Пример #3
0
class GridAgent(Agent):
    def __init__(self, objectives, landmarks):
        # pylint: disable=line-too-long, too-many-locals

        super().__init__(None, 'grid_agent')

        N = Network(None, objectives)
        SENSOR = N.add_SENSOR_node
        self.status = N.get_NEEDs()
        self.status_history = {'energy': [], 'water': []}

        # Create sensors
        SENSOR(Water)
        SENSOR(Energy)
        # create one SENSOR for each square
        sensor_dict = {}
        for lm in landmarks:
            sensor_dict[frozenset([SENSOR(Landmark, lm)])] = lm
        network_model = NetworkModel(sensor_dict)

        M = MotorNetwork(motors, motors_to_action)

        # NOTE: init=agent_start_pos, using a location here (only for debugging),
        #            is a state when MDP:s are used
        self.ndp = NetworkDP(agent_start_pos, self.status, motor_model, .9,
                             network_model)
        self.q_agent = NetworkQLearningAgent(self.ndp,
                                             Ne=0,
                                             Rplus=2,
                                             alpha=lambda n: 60. / (59 + n),
                                             epsilon=0.2,
                                             delta=0.5)

        # compose applies the functions from right to left
        self.program = compose(
            do(partial(l.debug, 'mnetwork.update')), M.update,
            do(partial(l.debug, 'q_agent')), self.q_agent,
            do(partial(l.debug, N)), do(partial(l.debug,
                                                'network.update')), N.update,
            do(partial(l.debug, 'percept')),
            lambda x: do(partial(l.debug, '*** ENERY FOUND ***'))(x)
            if 'energy' in x[1] and x[1]['energy'] > 0.0 else x,
            lambda x: do(partial(l.debug, '*** WATER FOUND ***'))(x)
            if 'water' in x[1] and x[1]['water'] > 0.0 else x, do(self.printU))

    def __repr__(self):
        return '<{} ({})>'.format(self.__name__, self.__class__.__name__)

    def printU(self, _):
        for status in ['energy', 'water']:
            l.info('----- ' + status + '------')
            U, pi = self.q_agent.Q_to_U_and_pi()[status]

            l.info('Utilities:')
            U = {k: '{0:.3f}'.format(v) for k, v in U.items()}
            print_grid(U)
            l.info('Policy:')
            print_grid(pi)
Пример #4
0
    def __init__(self, objectives, landmarks):
        # pylint: disable=line-too-long, too-many-locals

        super().__init__(None, 'grid_agent')

        N = Network(None, objectives)
        SENSOR = N.add_SENSOR_node
        self.status = N.get_NEEDs()
        self.status_history = {'energy': [], 'water': []}

        # Create sensors
        SENSOR(Water)
        SENSOR(Energy)
        # create one SENSOR for each square
        sensor_dict = {}
        for lm in landmarks:
            sensor_dict[frozenset([SENSOR(Landmark, lm)])] = lm
        network_model = NetworkModel(sensor_dict)

        M = MotorNetwork(motors, motors_to_action)

        # NOTE: init=agent_start_pos, using a location here (only for debugging),
        #            is a state when MDP:s are used
        self.ndp = NetworkDP(agent_start_pos, self.status, motor_model, .9,
                             network_model)
        self.q_agent = NetworkQLearningAgent(self.ndp,
                                             Ne=0,
                                             Rplus=2,
                                             alpha=lambda n: 60. / (59 + n),
                                             epsilon=0.2,
                                             delta=0.5)

        # compose applies the functions from right to left
        self.program = compose(
            do(partial(l.debug, 'mnetwork.update')), M.update,
            do(partial(l.debug, 'q_agent')), self.q_agent,
            do(partial(l.debug, N)), do(partial(l.debug,
                                                'network.update')), N.update,
            do(partial(l.debug, 'percept')),
            lambda x: do(partial(l.debug, '*** ENERY FOUND ***'))(x)
            if 'energy' in x[1] and x[1]['energy'] > 0.0 else x,
            lambda x: do(partial(l.debug, '*** WATER FOUND ***'))(x)
            if 'water' in x[1] and x[1]['water'] > 0.0 else x, do(self.printU))
Пример #5
0
    def __init__(self, objectives):
        # pylint: disable=line-too-long

        super().__init__(None, 'calf')

        motors = ['eat_and_forward', 'forward', 'dive_and_forward',
                  'up_and_forward']

        eat_and_forward, forward = frozenset([0]), frozenset([1])
        dive_and_forward, up_and_forward = frozenset([2]), frozenset([3])

        motors_to_action = {eat_and_forward: 'eat_and_forward',
                            forward: 'forward',
                            dive_and_forward: 'dive_and_forward',
                            up_and_forward: 'up_and_forward',
                            '*': '-'}

        motor_model = MotorModel(motors_to_action)


        self.network = N = Network(None, objectives)
        self.status = N.get_NEEDs()
        self.status_history = {'energy':[]}
        s1 = N.add_SENSOR_node(Squid)
        s2 = N.add_SENSOR_node(Song)
        self.network_model = NetworkModel({frozenset([]): 'no_sensors',
                                      frozenset([s1]): 'squid',
                                      frozenset([s2]): 'song',
                                      frozenset([s1,s2]): 'squid_and_song'})

        self.motor_network = M = MotorNetwork(motors, motors_to_action)

        # NOTE: init=agent_start_pos, using a location here (only for debugging),
        #            is a state when MDP:s are used
        self.ndp = NetworkDP(calf_start_pos, self.status, motor_model, gamma=.9,
                             network_model=self.network_model)
        self.q_agent = NetworkQLearningAgent(self.ndp, Ne=0, Rplus=2,
                                             alpha=lambda n: 60./(59+n),
                                             epsilon=0.2,
                                             delta=0.5)

        # compose applies the functions from right to left
        self.program = compose(do(partial(l.debug, 'Calf mnetwork.update'))
                               , do(partial(l.debug, M))
                               , lambda a: do(partial(l.debug, '*** CALF EATING! ***'))(a) if a == 'eat_and_forward' else a
                               , M.update
                               , do(partial(l.debug, 'Calf q_agent'))
                               , self.q_agent
                               , do(partial(l.debug, N))
                               , lambda p: do(partial(l.debug, '*** CALF HEARD SONG! ***'))(p) if s2 in p[0] else p
                               , lambda p: do(partial(l.debug, '*** CALF FOUND SQUID! ***'))(p) if s1 in p[0] else p
                               , do(partial(l.debug, 'Calf network.update'))
                               , N.update
                               , do(partial(l.debug, 'Calf percept'))
                              )
Пример #6
0
    def __init__(self, objectives):
        # pylint: disable=line-too-long, too-many-locals

        # program=None
        super().__init__(None, 'mom')

        # Motors and actions
        motors = ['sing_eat_and_forward', 'forward', 'dive_and_forward',
                  'up_and_forward']

        sing_eat_and_forward, forward = frozenset([0]), frozenset([1])
        dive_and_forward, up_and_forward = frozenset([2]), frozenset([3])

        motors_to_action = {sing_eat_and_forward: 'sing_eat_and_forward',
                            forward: 'forward',
                            dive_and_forward: 'dive_and_forward',
                            up_and_forward: 'up_and_forward',
                            '*': '-'}

        motor_model = MotorModel(motors_to_action)


        self.network = N = Network(None, objectives)
        self.status = N.get_NEEDs()
        self.status_history = {'energy':[]}
        s1 = N.add_SENSOR_node(Squid)
        self.network_model = NetworkModel({frozenset(): 'no_sensors',
                                           frozenset([s1]): 'squid'})

        self.motor_network = M = MotorNetwork(motors, motors_to_action)

        # NOTE: init=agent_start_pos, using a location here (only for debugging),
        #            is a state when MDP:s are used
        self.ndp = NetworkDP(mom_start_pos, self.status, motor_model, gamma=.9,
                             network_model=self.network_model)
        self.q_agent = NetworkQLearningAgent(self.ndp, Ne=0, Rplus=2,
                                             alpha=lambda n: 60./(59+n),
                                             epsilon=0.2,
                                             delta=0.5)

        # compose applies the functions from right to left
        self.program = compose(do(partial(l.debug, 'Mom mnetwork.update'))
                               , do(partial(l.debug, M))
                               , M.update
                               , do(partial(l.debug, 'Mom q_agent'))
                               , self.q_agent
                               , do(partial(l.debug, N))
                               , do(partial(l.debug, 'Mom network.update'))
                               , N.update
                               , do(partial(l.debug, 'Mom percept'))
                              )
Пример #7
0
    def test_networkQLearnigAgent(self):
        # pylint: disable=line-too-long
        self.assertTrue(
            self.sensor_model.model == {
                (True, False, False, False, False, False, False, False, False, False, False):
                'a',
                (False, True, False, False, False, False, False, False, False, False, False):
                'b',
                (False, False, True, False, False, False, False, False, False, False, False):
                'c',
                (False, False, False, True, False, False, False, False, False, False, False):
                'd',
                (False, False, False, False, True, False, False, False, False, False, False):
                'e',
                (False, False, False, False, False, True, False, False, False, False, False):
                'f',
                (False, False, False, False, False, False, True, False, False, False, False):
                'g',
                (False, False, False, False, False, False, False, True, False, False, False):
                'h',
                (False, False, False, False, False, False, False, False, True, False, False):
                'i',
                (False, False, False, False, False, False, False, False, False, True, False):
                'j',
                (False, False, False, False, False, False, False, False, False, False, True):
                'k'
            })

        self.assertTrue(
            self.ndp.actlist == [(False,
                                  True), (False,
                                          False), (True, True), (True, False)])

        q_agent = NetworkQLearningAgent(self.ndp,
                                        Ne=5,
                                        Rplus=2,
                                        alpha=lambda n: 60. / (59 + n),
                                        delta=0.5,
                                        max_iterations=100,
                                        calc_status=True)

        for i in range(100):
            l.debug('**** TRIAL: ', i, ' ****')
            q_agent.reset()
            run_single_trial(q_agent, self.test_mdp, self.sensor_model,
                             self.motor_model, DEBUG_MODE)

        U, pi = q_agent.Q_to_U_and_pi()['energy']

        # print the utilities and the policy also
        U1 = sorted(U.items(), key=lambda x: x[0])
        pi1 = sorted(pi.items(), key=lambda x: x[0])
        l.debug('------------------')
        print_grid(U1)
        print_grid(pi1)

        l.debug('AAA', U, U1)
        l.debug('BBB', pi, pi1)
        l.debug('CCC', q_agent)
        save_csv_file('one_dim.csv', [self.ndp.history],
                      self.ndp.history_headers, OUTPUT_DIR)

        # check utilities and policy
        # TODO: Seams difficult to get the samt result at each run. Probably due to tests running in parallel
        #self.assertTrue(U == {'h': 0.5675987591078075, 'i': 0.4286810287409787, 'j': 0.3330852421527908, 'k': -0.04, 'g': 0.48303073762721593, 'f': 0.35799047401701395, 'e': 0.7127484585669493, 'c': 1.302492662358114, 'd': 0.4742671906118568, 'a': 0.8593590286870549, 'b': 1.0802110658809299})
        #self.assertTrue(pi == {'h': '^', 'i': '<', 'j': '<', 'k': None, 'g': '<', 'f': '<', 'e': '^', 'c': '>', 'd': '>', 'a': '>', 'b': '>'})

        l.debug('test_networkQLearnigAgent:', self.ndp.statuses)