Exemplo n.º 1
0
    def observation(self, agent, world):
        def communications_observed(other_agent):
            ''' fill in information communicated between agents
            '''
            comms = delta_pos(other_agent, agent).tolist()
            comms += [other_agent.state.c]
            # will only work with zero-padding formatting
            # TODO: I think non-communication should send None instead of zero, because zero has real meaning
            #   however this causes a problem with action function
            if distance(agent, other_agent) > world.max_communication_distance:
                comms = [0] * len(comms)
            return comms

        landmark_positions = format_observation(
            observe=lambda landmark: delta_pos(landmark, agent).tolist(),
            objects=world.landmarks,
            num_observations=len(world.landmarks),
            observation_size=world.dim_p)

        communications = format_observation(
            observe=communications_observed,
            objects=[
                a for a in world.agents
                if (a is not agent and not a.terminated)
            ],
            num_observations=self.num_agents,
            observation_size=world.dim_p + 1,
            sort_key=lambda o: distance(agent, o))

        return np.asarray(agent.state.p_pos.tolist() + landmark_positions +
                          communications)
Exemplo n.º 2
0
    def observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        def observe_agents(other_agent):
            ''' fill in information communicated/observed between agents
            '''
            ag_obs = delta_pos(other_agent, agent).tolist()

            # check if within observation range
            if distance(agent, other_agent) > world.max_communication_distance:
                ag_obs = [0] * len(ag_obs)

            return ag_obs

        def observe_landmarks(landmark):
            ''' fill in information observed about landmarks
            '''
            ld_obs = delta_pos(landmark, agent).tolist()

            # check if within observation range and is observable
            if (distance(landmark, agent) > world.max_communication_distance
                    or not landmark.is_observable()):
                ld_obs = [0] * len(ld_obs)

            return ld_obs

        landmark_positions = format_observation(observe=observe_landmarks,
                                                objects=world.landmarks,
                                                num_observations=len(
                                                    world.landmarks),
                                                observation_size=world.dim_p)

        communications = format_observation(
            observe=observe_agents,
            objects=[
                a for a in world.agents
                if (a is not agent and not a.terminated)
            ],
            num_observations=self.num_agents,
            observation_size=world.dim_p,
            sort_key=lambda o: distance(agent, o))

        obs = np.asarray(agent.state.p_pos.tolist() + landmark_positions +
                         communications)

        if agent.terminated:
            # if agent is terminated, return all zeros for observation
            # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all
            #   then set to zero. Done this way to ensure consistant array size
            obs = 0.0 * obs

        return obs
Exemplo n.º 3
0
    def observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        def observe_landmarks(landmark):
            ''' fill in information observed about landmarks
            '''
            # ld_obs = delta_pos(landmark, agent).tolist()
            ld_obs = landmark.state.p_pos.tolist()

            # check if landmark is giving reward or hazard warning
            d = distance(landmark, agent)
            if d < landmark.size:
                if landmark.is_hazard:
                    landmark.hazard_tag = 1.0
                    landmark.color = np.array([1.1, 0, 0])
            ld_obs += [landmark.hazard_tag]

            assert (len(ld_obs) == _LANDMARK_OBSERVATION_LEN)
            return ld_obs

        landmark_positions = format_observation(
            observe=observe_landmarks,
            objects=world.landmarks,
            num_observations=len(world.landmarks),
            observation_size=_LANDMARK_OBSERVATION_LEN)

        # new_obs = np.asarray(agent.state.p_vel.tolist() + agent.state.p_pos.tolist() + landmark_positions + agent_observations)
        new_obs = np.asarray(agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() + landmark_positions)

        return new_obs
Exemplo n.º 4
0
    def observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        def communications_observed(other_comm_node):
            ''' Communication between agents is just the conductance
            Notes:
             - inverse of comm resistance (i.e. conductance) used so that we can use
             zero for out of range comms
             - noisy measurement of heading
             - TODO: observation of failures
            '''

            # check if node is terminated
            is_terminated = 0
            if isinstance(other_comm_node, MortalAgent) and other_comm_node.terminated:
                is_terminated = 1

            dx = dy = dvx = dvy = 0.
            if not is_terminated:
                dx, dy = delta_pos(other_comm_node, agent)
                dvx, dvy = delta_vel(other_comm_node, agent)

            comms = [is_terminated, dx, dy, dvx, dvy]
            # dx_noisy = dx + np.random.normal(0.0, 0.01)
            # dy_noisy = dy + np.random.normal(0.0, 0.01)
            # comms = [dx_noisy, dy_noisy]
            # heading = np.arctan2(dy,dx) + np.random.normal(0.0, 0.1)
            # conductance = 1.0/self._calculate_resistance(agent, other_comm_node, world)
            # comms = [heading, conductance]

            # set comms to zero if out for range
            # if distance(agent, other_comm_node) > agent.max_observation_distance:
            #     comms = [0] * len(comms)

            return comms

        # comm_nodes are origin and destination terminals and unterminated agents
        comm_nodes = world.landmarks[0:2]
        comm_nodes.extend([a for a in world.agents if a is not agent])
        communications = format_observation(observe = communications_observed, 
                                            objects = comm_nodes,
                                            num_observations = (self.num_agents-1) + 2, 
                                            observation_size = 2*world.dim_p + 1)

        
        # observe velocity if not terminated
        vel_obs = [0, 0]
        if not agent.terminated:
            vel_obs = agent.state.p_vel.tolist()

        # package observation
        obs = np.asarray(vel_obs + agent.state.p_pos.tolist() + communications)

        if agent.terminated:
            # if agent is terminated, return all zeros for observation
            # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all
            #   then set to zero. Done this way to ensure consistant array size
            obs = 0.0*obs

        return obs
Exemplo n.º 5
0
 def observation(self, agent, world):
     # get positions of all entities in this agent's reference frame
     landmark_positions =  format_observation(observe = lambda landmark: delta_pos(agent, landmark).tolist(),
                                              objects = world.landmarks, 
                                              num_observations = len(world.landmarks), 
                                              observation_size = world.dim_p)
     obs = np.asarray(agent.state.p_pos.tolist() + landmark_positions)
     return obs
Exemplo n.º 6
0
    def observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        def communications_observed(other_agent):
            ''' fill in information communicated between agents
            '''
            comms = delta_pos(other_agent, agent).tolist()
            comms += [
                self._calculate_landmark_resistance_gain(
                    agent, other_agent, world)
            ]
            # will only work with zero-padding formatting
            # TODO: I think non-communication should send None instead of zero, because zero has real meaning
            #   however this causes a problem with action function
            if distance(agent, other_agent) > world.max_communication_distance:
                comms = [0] * len(comms)
            return comms

        landmark_positions = format_observation(
            observe=lambda landmark: delta_pos(landmark, agent).tolist(),
            objects=world.landmarks,
            num_observations=len(world.landmarks),
            observation_size=world.dim_p)

        communications = format_observation(
            observe=communications_observed,
            objects=[
                a for a in world.agents
                if (a is not agent and not a.terminated)
            ],
            num_observations=self.num_agents,
            observation_size=world.dim_p + 1,
            sort_key=lambda o: distance(agent, o))

        obs = np.asarray(agent.state.p_pos.tolist() + landmark_positions +
                         communications)

        if agent.terminated:
            # if agent is terminated, return all zeros for observation
            # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all
            #   then set to zero. Done this way to ensure consistant array size
            obs = 0.0 * obs

        return obs
    def _direct_observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        def communications_observed(other_comm_node):
            ''' Communication between agents is just the conductance
            Notes:
             - inverse of comm resistance (i.e. conductance) used so that we can use
             zero for out of range comms
             - noisy measurement of heading
             - TODO: observation of failures
            '''

            # check if node is terminated
            is_terminated = 0
            if isinstance(other_comm_node,
                          MortalAgent) and other_comm_node.terminated:
                is_terminated = 1

            dx = dy = dvx = dvy = 0.
            if not is_terminated:
                dx, dy = delta_pos(other_comm_node, agent)
                dvx, dvy = delta_vel(other_comm_node, agent)

            comms = [is_terminated, dx, dy, dvx, dvy]

            # set comms to zero if out for range
            # if distance(agent, other_comm_node) > agent.max_observation_distance:
            if not check_2way_communicability(agent, other_comm_node):
                comms = [0] * len(comms)

            return comms

        # Observe communication terminals
        terminals = (world.origin_terminal_landmark.state.p_pos.tolist() +
                     world.destination_terminal_landmark.state.p_pos.tolist())

        # comm_nodes are origin and destination terminals and unterminated agents
        comm_nodes = []
        comm_nodes.extend([a for a in world.agents if a is not agent])
        communications = format_observation(
            observe=communications_observed,
            objects=comm_nodes,
            num_observations=self.num_agents - 1,
            observation_size=2 * world.dim_p + 1)

        # package observation
        obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() +
                         agent.state.p_pos.tolist() + terminals +
                         communications)

        return obs
Exemplo n.º 8
0
    def test_format_observation_1(self):
        ''' test observation formatting basic functionality
        '''

        # trial 1
        ag = [0,0]
        a1 = [1,1]
        a2 = [-1.0, 1.0]
        observe = lambda a: [a[0]-ag[0], a[1]-ag[1]]
        objects = [a1, a2]
        num_observations = 2
        observation_size = 2
        observation = ol.format_observation(observe=observe, 
                                            objects=objects,    
                                            num_observations=num_observations, 
                                            observation_size=observation_size)
        self.assertEqual(len(observation), 4)
        self.assertAlmostEqual(observation[0], 1.0)
        self.assertAlmostEqual(observation[1], 1.0)
        self.assertAlmostEqual(observation[2], -1.0)
        self.assertAlmostEqual(observation[3], 1.0)
Exemplo n.º 9
0
    def test_format_observation_2(self):
        ''' test observation formatting for padding
        '''

        # trial 1
        ag = [0,0]
        a1 = [1,1]
        a2 = [-1.0, 1.0]
        observe = lambda a: [a[0]-ag[0], a[1]-ag[1]]
        objects = [a1, a2]
        num_observations = 5
        observation_size = 2
        observation = ol.format_observation(observe=observe, 
                                            objects=objects,    
                                            num_observations=num_observations, 
                                            observation_size=observation_size)
        self.assertEqual(len(observation), 10)
        self.assertAlmostEqual(observation[0], 1.0)
        self.assertAlmostEqual(observation[1], 1.0)
        self.assertAlmostEqual(observation[2], -1.0)
        self.assertAlmostEqual(observation[3], 1.0)
        for i in range(4, len(observation)):
            self.assertAlmostEqual(observation[i], 0.0)
    def _direct_observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        def observe_agents(other_agent):
            ''' fill in information communicated/observed between agents
            '''

            # check if node is terminated
            is_terminated = 0
            if isinstance(other_agent, MortalAgent) and other_agent.terminated:
                is_terminated = 1

            # relative speed and position of other agent
            # dx = dy = dvx = dvy = 0.
            # if not is_terminated:
            dx, dy = delta_pos(other_agent, agent)
            dvx, dvy = delta_vel(other_agent, agent)

            # get local reward function at position of other agent
            other_landmark_sensor_reading = 0.0
            for lm in world.landmarks:
                other_landmark_sensor_reading += lm.reward_fn.get_value(
                    *other_agent.state.p_pos)

            ag_obs = [
                is_terminated, dx, dy, dvx, dvy, other_landmark_sensor_reading
            ]
            assert (len(ag_obs) == _AGENT_OBSERVATION_LEN)
            return ag_obs

        def observe_landmarks(landmark):
            ''' fill in information observed about landmarks
            '''
            # ld_obs = delta_pos(landmark, agent).tolist()

            # # check if within observation range and is observable
            # d = distance(landmark, agent)
            # if d > world.max_communication_distance:
            #     ld_obs = [0.0]*len(ld_obs)

            ld_obs = []

            # check reward signal received from landmark
            landmark_sensor_reading = 0.0
            for lm in world.landmarks:
                landmark_sensor_reading += lm.reward_fn.get_value(
                    *agent.state.p_pos)

            ld_obs += [landmark_sensor_reading]

            assert (len(ld_obs) == _LANDMARK_OBSERVATION_LEN)
            return ld_obs

        landmark_observations = format_observation(
            observe=observe_landmarks,
            objects=world.landmarks,
            num_observations=len(world.landmarks),
            observation_size=_LANDMARK_OBSERVATION_LEN)

        agent_observations = format_observation(
            observe=observe_agents,
            objects=[a for a in world.agents if a is not agent],
            num_observations=self.num_agents - 1,
            observation_size=_AGENT_OBSERVATION_LEN)

        new_obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() +
                             landmark_observations + agent_observations)

        # append previous observation for velocity estimation
        if agent.previous_observation is None:
            agent.previous_observation = 0.0 * new_obs
        obs = np.append(new_obs, agent.previous_observation)
        agent.previous_observation = new_obs

        return obs
Exemplo n.º 11
0
    def _direct_observation(self, agent, world):
        ''' observation where each entity's state has it's own component of observe vector '''

        # get positions of all entities in this agent's reference frame
        def observe_agents(other_agent):
            ''' fill in information communicated/observed between agents
            '''
            # check if node is terminated
            is_terminated = 0
            if isinstance(other_agent, MortalAgent) and other_agent.terminated:
                is_terminated = 1

            dx = dy = dvx = dvy = 0.
            if not is_terminated:
                dx, dy = delta_pos(other_agent, agent)
                dvx, dvy = delta_vel(other_agent, agent)

            ag_obs = [is_terminated, dx, dy, dvx, dvy]
            assert (len(ag_obs) == _AGENT_OBSERVATION_LEN)
            return ag_obs

        def observe_landmarks(landmark):
            ''' fill in information observed about landmarks
            '''
            ld_obs = delta_pos(landmark, agent).tolist()

            # check if within observation range and is observable
            d = distance(landmark, agent)
            if d > world.max_communication_distance:
                ld_obs = [0.0] * len(ld_obs)

            # check if landmark is giving reward or hazard warning
            if d < landmark.size:
                if landmark.is_hazard:
                    landmark.hazard_tag = 1.0
                    landmark.color = np.array([1.1, 0, 0])
            ld_obs += [landmark.hazard_tag]

            assert (len(ld_obs) == _LANDMARK_OBSERVATION_LEN)
            return ld_obs

        landmark_positions = format_observation(
            observe=observe_landmarks,
            objects=world.landmarks,
            num_observations=len(world.landmarks),
            observation_size=_LANDMARK_OBSERVATION_LEN)

        agent_observations = format_observation(
            observe=observe_agents,
            objects=[a for a in world.agents if (a is not agent)],
            num_observations=self.num_agents - 1,
            observation_size=_AGENT_OBSERVATION_LEN)

        new_obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() + landmark_positions +
                             agent_observations)

        # append previous observation for velocity estimation
        # if agent.previous_observation is None:
        #     agent.previous_observation = 0.0*new_obs
        # obs = np.append(new_obs, agent.previous_observation)
        # agent.previous_observation = new_obs

        return new_obs
    def observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        def observe_agents(other_agent):
            ''' fill in information communicated/observed between agents
            '''
            # check if node is terminated
            is_terminated = 0
            if isinstance(other_agent, MortalAgent) and other_agent.terminated:
                is_terminated = 1

            # relative speed and position of other agent
            # dx = dy = dvx = dvy = 0.
            # if not is_terminated:
            dx, dy = delta_pos(other_agent, agent)
            dvx, dvy = delta_vel(other_agent, agent)

            # get local reward function at position of other agent
            other_sensed_reward = 0.0
            for lm in world.landmarks:
                other_sensed_reward += lm.reward_fn.get_value(
                    *other_agent.state.p_pos)

            ag_obs = [is_terminated, dx, dy, dvx, dvy, other_sensed_reward]
            assert (len(ag_obs) == _AGENT_OBSERVATION_LEN)
            return ag_obs

        def observe_landmarks(landmark):
            ''' fill in information observed about landmarks
            '''
            # ld_obs = delta_pos(landmark, agent).tolist()

            # # check if within observation range and is observable
            # d = distance(landmark, agent)
            # if d > world.max_communication_distance:
            #     ld_obs = [0.0]*len(ld_obs)

            ld_obs = []

            # check reward signal received from landmark
            sensed_reward = 0.0
            for lm in world.landmarks:
                sensed_reward += lm.reward_fn.get_value(*agent.state.p_pos)

            ld_obs += [sensed_reward]

            assert (len(ld_obs) == _LANDMARK_OBSERVATION_LEN)
            return ld_obs

        landmark_observations = format_observation(
            observe=observe_landmarks,
            objects=world.landmarks,
            num_observations=len(world.landmarks),
            observation_size=_LANDMARK_OBSERVATION_LEN)

        agent_observations = format_observation(
            observe=observe_agents,
            objects=[
                a for a in world.agents
                if (a is not agent and not a.terminated)
            ],
            num_observations=self.num_agents - 1,
            observation_size=_AGENT_OBSERVATION_LEN)

        new_obs = np.asarray(agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() +
                             landmark_observations + agent_observations)

        if agent.terminated:
            # if agent is terminated, return all zeros for observation
            # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all
            #   then set to zero. Done this way to ensure consistant array size
            new_obs = 0.0 * new_obs

        # append previous observation for velocity estimation
        if agent.previous_observation is None:
            agent.previous_observation = 0.0 * new_obs
        obs = np.append(new_obs, agent.previous_observation)
        agent.previous_observation = new_obs

        return obs