def communications_observed(other_comm_node):
            ''' Communication between agents is just the conductance
            Notes:
             - inverse of comm resistance (i.e. conductance) used so that we can use
             zero for out of range comms
             - noisy measurement of heading
             - TODO: observation of failures
            '''

            # check if node is terminated
            is_terminated = 0
            if isinstance(other_comm_node,
                          MortalAgent) and other_comm_node.terminated:
                is_terminated = 1

            dx = dy = dvx = dvy = 0.
            if not is_terminated:
                dx, dy = delta_pos(other_comm_node, agent)
                dvx, dvy = delta_vel(other_comm_node, agent)

            comms = [is_terminated, dx, dy, dvx, dvy]

            # set comms to zero if out for range
            # if distance(agent, other_comm_node) > agent.max_observation_distance:
            if not check_2way_communicability(agent, other_comm_node):
                comms = [0] * len(comms)

            return comms
Exemplo n.º 2
0
    def reward(self, agent, world):
        ''' reward value received by each agent
        '''
        # rew = sum([self._individual_reward(a, world) for a in world.agents if not a.terminated])

        if agent.terminated:
            # terminated agents produce no reward
            return 0.0

        total_reward = 0.0
        for ldmrk in world.landmarks:
            if isinstance(ldmrk, RiskRewardLandmark):

                # check if landmark is observable and skip if not
                if not ldmrk.is_observable():
                    continue

                # if observable, evaluate reward
                xrel, yrel = delta_pos(agent, ldmrk)
                base_reward = ldmrk.reward_fn.get_value(xrel, yrel)

                # adjust reward based on cumulative reward distributed (diminishing returns)
                adjusted_reward = base_reward * np.exp(
                    -ldmrk.cumulative_distributed_reward)
                ldmrk.update_cumulative_distributed_reward(adjusted_reward)

                # sum up rewards from all landmarsk
                total_reward += adjusted_reward

        return total_reward
        def observe_agents(other_agent):
            ''' fill in information communicated/observed between agents
            '''

            # check if node is terminated
            is_terminated = 0
            if isinstance(other_agent, MortalAgent) and other_agent.terminated:
                is_terminated = 1

            # relative speed and position of other agent
            # dx = dy = dvx = dvy = 0.
            # if not is_terminated:
            dx, dy = delta_pos(other_agent, agent)
            dvx, dvy = delta_vel(other_agent, agent)

            # get local reward function at position of other agent
            other_landmark_sensor_reading = 0.0
            for lm in world.landmarks:
                other_landmark_sensor_reading += lm.reward_fn.get_value(
                    *other_agent.state.p_pos)

            ag_obs = [
                is_terminated, dx, dy, dvx, dvy, other_landmark_sensor_reading
            ]
            assert (len(ag_obs) == _AGENT_OBSERVATION_LEN)
            return ag_obs
Exemplo n.º 4
0
    def observation(self, agent, world):
        def communications_observed(other_agent):
            ''' fill in information communicated between agents
            '''
            comms = delta_pos(other_agent, agent).tolist()
            comms += [other_agent.state.c]
            # will only work with zero-padding formatting
            # TODO: I think non-communication should send None instead of zero, because zero has real meaning
            #   however this causes a problem with action function
            if distance(agent, other_agent) > world.max_communication_distance:
                comms = [0] * len(comms)
            return comms

        landmark_positions = format_observation(
            observe=lambda landmark: delta_pos(landmark, agent).tolist(),
            objects=world.landmarks,
            num_observations=len(world.landmarks),
            observation_size=world.dim_p)

        communications = format_observation(
            observe=communications_observed,
            objects=[
                a for a in world.agents
                if (a is not agent and not a.terminated)
            ],
            num_observations=self.num_agents,
            observation_size=world.dim_p + 1,
            sort_key=lambda o: distance(agent, o))

        return np.asarray(agent.state.p_pos.tolist() + landmark_positions +
                          communications)
        def communications_observed(other_comm_node):
            ''' Communication between agents is just the conductance
            Notes:
             - inverse of comm resistance (i.e. conductance) used so that we can use
             zero for out of range comms
             - noisy measurement of heading
             - TODO: observation of failures
            '''

            # check if node is terminated
            is_terminated = 0
            if isinstance(other_comm_node,
                          MortalAgent) and other_comm_node.terminated:
                is_terminated = 1

            dx = dy = dvx = dvy = 0.
            if not is_terminated:
                dx, dy = delta_pos(other_comm_node, agent)
                dvx, dvy = delta_vel(other_comm_node, agent)

            comms = [is_terminated, dx, dy, dvx, dvy]
            # dx_noisy = dx + np.random.normal(0.0, 0.01)
            # dy_noisy = dy + np.random.normal(0.0, 0.01)
            # comms = [dx_noisy, dy_noisy]
            # heading = np.arctan2(dy,dx) + np.random.normal(0.0, 0.1)
            # conductance = 1.0/self._calculate_resistance(agent, other_comm_node, world)
            # comms = [heading, conductance]

            # set comms to zero if out for range
            # if distance(agent, other_comm_node) > agent.max_observation_distance:
            #     comms = [0] * len(comms)

            return comms
Exemplo n.º 6
0
 def observation(self, agent, world):
     # get positions of all entities in this agent's reference frame
     landmark_positions =  format_observation(observe = lambda landmark: delta_pos(agent, landmark).tolist(),
                                              objects = world.landmarks, 
                                              num_observations = len(world.landmarks), 
                                              observation_size = world.dim_p)
     obs = np.asarray(agent.state.p_pos.tolist() + landmark_positions)
     return obs
Exemplo n.º 7
0
 def _individual_reward(self, agent, world):
     # TODO cache calculated rewards to avoid re-calculation (this should be done in a generalized scenario class)
     rew = 0
     for l in world.landmarks:
         if isinstance(l, RiskRewardLandmark):
             rel_pos = delta_pos(agent, l)
             rew += l.reward_fn.get_value(rel_pos[0], rel_pos[1])
     return rew
Exemplo n.º 8
0
        def observe_agents(other_agent):
            ''' fill in information communicated/observed between agents
            '''
            ag_obs = delta_pos(other_agent, agent).tolist()

            # check if within observation range
            if distance(agent, other_agent) > world.max_communication_distance:
                ag_obs = [0] * len(ag_obs)

            return ag_obs
Exemplo n.º 9
0
        def observe_landmarks(landmark):
            ''' fill in information observed about landmarks
            '''
            ld_obs = delta_pos(landmark, agent).tolist()

            # check if within observation range and is observable
            if (distance(landmark, agent) > world.max_communication_distance
                    or not landmark.is_observable()):
                ld_obs = [0] * len(ld_obs)

            return ld_obs
Exemplo n.º 10
0
 def communications_observed(other_agent):
     ''' fill in information communicated between agents
     '''
     comms = delta_pos(other_agent, agent).tolist()
     comms += [other_agent.state.c]
     # will only work with zero-padding formatting
     # TODO: I think non-communication should send None instead of zero, because zero has real meaning
     #   however this causes a problem with action function
     if distance(agent, other_agent) > world.max_communication_distance:
         comms = [0] * len(comms)
     return comms
Exemplo n.º 11
0
        def observe_agents(other_agent):
            ''' fill in information communicated/observed between agents
            '''
            # check if node is terminated
            is_terminated = 0
            if isinstance(other_agent, MortalAgent) and other_agent.terminated:
                is_terminated = 1

            dx = dy = dvx = dvy = 0.
            if not is_terminated:
                dx, dy = delta_pos(other_agent, agent)
                dvx, dvy = delta_vel(other_agent, agent)

            ag_obs = [is_terminated, dx, dy, dvx, dvy]
            assert (len(ag_obs) == _AGENT_OBSERVATION_LEN)
            return ag_obs
Exemplo n.º 12
0
    def observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        def communications_observed(other_agent):
            ''' fill in information communicated between agents
            '''
            comms = delta_pos(other_agent, agent).tolist()
            comms += [
                self._calculate_landmark_resistance_gain(
                    agent, other_agent, world)
            ]
            # will only work with zero-padding formatting
            # TODO: I think non-communication should send None instead of zero, because zero has real meaning
            #   however this causes a problem with action function
            if distance(agent, other_agent) > world.max_communication_distance:
                comms = [0] * len(comms)
            return comms

        landmark_positions = format_observation(
            observe=lambda landmark: delta_pos(landmark, agent).tolist(),
            objects=world.landmarks,
            num_observations=len(world.landmarks),
            observation_size=world.dim_p)

        communications = format_observation(
            observe=communications_observed,
            objects=[
                a for a in world.agents
                if (a is not agent and not a.terminated)
            ],
            num_observations=self.num_agents,
            observation_size=world.dim_p + 1,
            sort_key=lambda o: distance(agent, o))

        obs = np.asarray(agent.state.p_pos.tolist() + landmark_positions +
                         communications)

        if agent.terminated:
            # if agent is terminated, return all zeros for observation
            # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all
            #   then set to zero. Done this way to ensure consistant array size
            obs = 0.0 * obs

        return obs
Exemplo n.º 13
0
        def observe_landmarks(landmark):
            ''' fill in information observed about landmarks
            '''
            ld_obs = delta_pos(landmark, agent).tolist()

            # check if within observation range and is observable
            d = distance(landmark, agent)
            if d > world.max_communication_distance:
                ld_obs = [0.0] * len(ld_obs)

            # check if landmark is giving reward or hazard warning
            if d < landmark.size:
                if landmark.is_hazard:
                    landmark.hazard_tag = 1.0
                    landmark.color = np.array([1.1, 0, 0])
            ld_obs += [landmark.hazard_tag]

            assert (len(ld_obs) == _LANDMARK_OBSERVATION_LEN)
            return ld_obs
    def _histogram_observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        # check reward signal received from landmark
        landmark_sensor_reading = 0.0
        for lm in world.landmarks:
            landmark_sensor_reading += lm.reward_fn.get_value(
                *agent.state.p_pos)

        # Format agent histograms
        bin_depth = _MAX_OBSERVATION_DISTANCE / 10.0
        radial_bins = np.logspace(np.log10(bin_depth),
                                  np.log10(_MAX_OBSERVATION_DISTANCE),
                                  num=_N_RADIAL_BINS)
        bin_angle = 2.0 * np.pi / float(_N_ANGULAR_BINS)
        angular_bins = np.linspace(bin_angle / 2.0,
                                   2 * np.pi - bin_angle / 2.0,
                                   num=_N_ANGULAR_BINS)
        agent_histogram_2d = np.array([[0] * _N_ANGULAR_BINS] * _N_RADIAL_BINS)
        reward_histogram_2d = np.array([[0.0] * _N_ANGULAR_BINS] *
                                       _N_RADIAL_BINS)

        # establish observation of failures
        observed_terminations_2d = []
        observed_terminations_dists = []

        # count agents in each bin
        for a in world.agents:
            dist = distance(a, agent)

            # skip if agent is agent
            if a == agent:
                continue

            # record observed termination
            if a.terminated:
                insert_index = bisect(observed_terminations_dists, dist)
                observed_terminations_dists.insert(insert_index, dist)
                observed_terminations_2d.insert(insert_index,
                                                delta_pos(a, agent))
                # don't "continue", record terminated agent in histogram like live agent

            # find radial bin
            rad_bin = np.searchsorted(radial_bins, dist)
            if rad_bin == _N_RADIAL_BINS:
                # agent is too far away and observation is not stored
                continue

            # calculate angle
            dx, dy = delta_pos(a, agent)
            ang = np.arctan2(dy, dx)
            if ang < 0:
                ang += 2 * np.pi

            # find angular bin
            ang_bin = np.searchsorted(angular_bins, ang)
            if ang_bin == _N_ANGULAR_BINS:
                ang_bin = 0

            # add count to histogram
            agent_histogram_2d[rad_bin][
                ang_bin] = agent_histogram_2d[rad_bin][ang_bin] + 1

            # add aggregate landmark sensor reading to histogram
            # Note: should not need to compute average reading over agents in bin
            #   because neural net should be able to learn to do this using agent count
            #   histogram
            for lm in world.landmarks:
                reward_histogram_2d[rad_bin][
                    ang_bin] += lm.reward_fn.get_value(*a.state.p_pos)

        # flatten histogram to 1d list
        agent_histogram = [
            val for sublist in agent_histogram_2d for val in sublist
        ]

        # flatten reward histogram to 1d list and compute average
        reward_histogram = [
            val for sublist in reward_histogram_2d for val in sublist
        ]

        # flatten, truncate/pad observed terminations to fixed length
        observed_terminations = [
            val for sublist in observed_terminations_2d for val in sublist
        ]
        observed_terminations = truncate_or_pad(observed_terminations,
                                                2 * _N_OBSERVED_TERMINATIONS)

        # package new observation
        new_obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() +
                             [landmark_sensor_reading] + agent_histogram +
                             reward_histogram + observed_terminations)

        # append previous observation for velocity estimation
        if agent.previous_observation is None:
            agent.previous_observation = 0.0 * new_obs
        obs = np.append(new_obs, agent.previous_observation)
        agent.previous_observation = new_obs

        return obs
Exemplo n.º 15
0
    def step(self):
        super().step()

        # decrement observability and reward function of certain landmarks
        for landmark in self.landmarks:
            if isinstance(landmark, TemporarilyObservableRiskRewardLandmark):
                landmark.decrement_observe_clock(self.dt)
                # landmark.update_reward_function()

        # check for casualties
        for i, agent in enumerate(self.agents):

            # skip terminated agents since already a casualty
            if agent.terminated:
                # self.render_geoms = None # force resetting of object rendering list
                continue

            # check for destruction by hazard
            for landmark in self.landmarks:
                if isinstance(landmark, RiskRewardLandmark):
                    rel_pos = delta_pos(agent, landmark)
                    agent_failure = landmark.risk_fn.sample_failure(
                        rel_pos[0], rel_pos[1])
                    if agent_failure:
                        agent.terminate_agent()

                        # If landmark caused a failure, make landmark observable
                        if isinstance(landmark,
                                      TemporarilyObservableRiskRewardLandmark):
                            landmark.set_observe_clock()
                        break

            # check for destruction by flyoff or excess velocity
            # NOTE: this is a hack, certain scenarios were causing early training to send
            #       agents to positions and velocities that overflowed floating point operations. this caused
            #       the policy to return NaN actions. By using the termination state, we shouldn't
            #       need to impose "walls" on the motion of agents, instead just cause them to
            #       be terminated if they fly too far off
            if (np.linalg.norm(agent.state.p_pos) >
                    self.flyoff_termination_radius or np.linalg.norm(
                        agent.state.p_vel) > self.flyoff_termination_speed):
                agent.terminate_agent()

            # check if spontaneous destruction of agent
            if np.random.rand() < self.spontaneous_termination_probability:
                agent.terminate_agent()

            # recheck for terminated agents and skip
            if agent.terminated:
                # self.render_geoms = None # force resetting of object rendering list
                continue

            # check for collisions
            for other_agent in self.agents[i + 1:]:

                if other_agent.terminated:
                    # skip if other agent alread terminated
                    continue

                if is_collision(agent, other_agent):
                    if np.random.random(
                    ) < self.collision_termination_probability:
                        # terminated other agent
                        other_agent.terminate_agent()
                        # logging.info('fatal crash')
                    if np.random.random(
                    ) < self.collision_termination_probability:
                        # terminated current agent
                        agent.terminate_agent()
                        # logging.info('fatal crash')
                        break
Exemplo n.º 16
0
    def observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        # Observe communication terminals
        terminals = (world.origin_terminal_landmark.state.p_pos.tolist() +
                     world.destination_terminal_landmark.state.p_pos.tolist())

        # Format agent histograms
        # bin_depth = _MAX_OBSERVATION_DISTANCE/float(_N_RADIAL_BINS)
        # radial_bins = np.linspace(bin_depth, _MAX_OBSERVATION_DISTANCE, num=_N_RADIAL_BINS)
        bin_depth = _MAX_OBSERVATION_DISTANCE / 10.0
        radial_bins = np.logspace(np.log10(bin_depth),
                                  np.log10(_MAX_OBSERVATION_DISTANCE),
                                  num=_N_RADIAL_BINS)
        bin_angle = 2.0 * np.pi / float(_N_ANGULAR_BINS)
        angular_bins = np.linspace(bin_angle / 2.0,
                                   2 * np.pi - bin_angle / 2.0,
                                   num=_N_ANGULAR_BINS)
        agent_histogram_2d = np.array([[0] * _N_ANGULAR_BINS] * _N_RADIAL_BINS)

        # establish observation of failures
        observed_terminations_2d = []
        observed_terminations_dists = []

        # count agents in each bin
        for a in world.agents:
            dist = distance(a, agent)

            # skip if agent is agent
            if a == agent:
                continue

            # record observed termination
            if a.terminated:
                insert_index = bisect(observed_terminations_dists, dist)
                observed_terminations_dists.insert(insert_index, dist)
                observed_terminations_2d.insert(insert_index,
                                                delta_pos(a, agent))
                continue

            # skip if outside of observation range
            if not agent.is_entity_observable(a):
                continue

            # find radial bin
            rad_bin = np.searchsorted(radial_bins, dist)

            # calculate angle
            dx, dy = delta_pos(a, agent)
            ang = np.arctan2(dy, dx)
            if ang < 0:
                ang += 2 * np.pi

            # find angular bin
            ang_bin = np.searchsorted(angular_bins, ang)
            if ang_bin == _N_ANGULAR_BINS:
                ang_bin = 0

            # add count to histogram
            agent_histogram_2d[rad_bin][
                ang_bin] = agent_histogram_2d[rad_bin][ang_bin] + 1

        # flatten histogram to 1d list
        agent_histogram = [
            val for sublist in agent_histogram_2d for val in sublist
        ]

        # flatten, truncate/pad observed terminations to fixed length
        observed_terminations = [
            val for sublist in observed_terminations_2d for val in sublist
        ]
        observed_terminations = truncate_or_pad(observed_terminations,
                                                2 * _N_OBSERVED_TERMINATIONS)

        # package new observation
        new_obs = np.asarray(agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() + terminals +
                             agent_histogram + observed_terminations)
        if agent.terminated:
            # if agent is terminated, return all zeros for observation
            # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all
            #   then set to zero. Done this way to ensure consistant array size
            new_obs = 0.0 * new_obs

        # append previous observation for velocity estimation
        if agent.previous_observation is None:
            agent.previous_observation = 0.0 * new_obs
        obs = np.append(new_obs, agent.previous_observation)
        agent.previous_observation = new_obs

        return obs
Exemplo n.º 17
0
def landmark_histogram_observation(cur_agent, landmarks, obs_distance,
                                   n_radial_bins, n_angular_bins):
    ''' generate observation histogram of landmarks and list of hazards 
    '''

    # generate radial histogram bins based on sensing limitations
    bin_depth = obs_distance / 10.0
    radial_bins = np.logspace(np.log10(bin_depth),
                              np.log10(obs_distance),
                              num=n_radial_bins)

    # generate angular histogram bins
    bin_angle = 2.0 * np.pi / float(n_angular_bins)
    angular_bins = np.linspace(bin_angle / 2.0,
                               2 * np.pi - bin_angle / 2.0,
                               num=n_angular_bins)
    landmark_histogram_2d = np.array([[0] * n_angular_bins] * n_radial_bins)

    # establish observation of failures
    observed_hazards_2d = []
    observed_hazards_dists = []

    # count agents in each bin
    for lm in landmarks:
        dist = distance(lm, cur_agent)

        # check if landmark is giving reward or hazard warning
        # NOTE: This modifies the landmarks list
        if dist < lm.size:
            if lm.is_hazard:
                lm.hazard_tag = 1.0
                lm.color = np.array([1.1, 0, 0])

        # record observed hazard
        if lm.hazard_tag > 0.0:
            insert_index = bisect(observed_hazards_dists, dist)
            observed_hazards_dists.insert(insert_index, dist)
            observed_hazards_2d.insert(insert_index, delta_pos(lm, cur_agent))
            continue

        # skip if outside of observation range
        if dist > obs_distance or (isinstance(cur_agent,
                                              SensingLimitedMortalAgent)
                                   and not cur_agent.is_entity_observable(lm)):
            continue

        # find radial bin
        rad_bin = np.searchsorted(radial_bins, dist)

        # calculate angle
        dx, dy = delta_pos(lm, cur_agent)
        ang = np.arctan2(dy, dx)
        if ang < 0:
            ang += 2 * np.pi

        # find angular bin
        ang_bin = np.searchsorted(angular_bins, ang)
        if ang_bin == n_angular_bins:
            ang_bin = 0

        # add count to histogram
        landmark_histogram_2d[rad_bin][
            ang_bin] = landmark_histogram_2d[rad_bin][ang_bin] + 1

    return landmark_histogram_2d, observed_hazards_2d
Exemplo n.º 18
0
def agent_histogram_observation(cur_agent, agents, obs_distance, n_radial_bins,
                                n_angular_bins):
    ''' generate observation histogram of agents and list of terminated agents
    '''

    # generate radial histogram bins based on sensing limitations
    bin_depth = obs_distance / 10.0
    radial_bins = np.logspace(np.log10(bin_depth),
                              np.log10(obs_distance),
                              num=n_radial_bins)

    # generate angular histogram bins
    bin_angle = 2.0 * np.pi / float(n_angular_bins)
    angular_bins = np.linspace(bin_angle / 2.0,
                               2 * np.pi - bin_angle / 2.0,
                               num=n_angular_bins)
    agent_histogram_2d = np.array([[0] * n_angular_bins] * n_radial_bins)

    # establish observation of failures
    observed_terminations_2d = []
    observed_terminations_dists = []

    # count agents in each bin
    for a in agents:
        dist = distance(a, cur_agent)

        # skip if agent is agent
        if a == cur_agent:
            continue

        # record observed termination
        if a.terminated:
            insert_index = bisect(observed_terminations_dists, dist)
            observed_terminations_dists.insert(insert_index, dist)
            observed_terminations_2d.insert(insert_index,
                                            delta_pos(a, cur_agent))
            continue

        # skip if outside of observation range
        if dist > obs_distance or (isinstance(cur_agent,
                                              SensingLimitedMortalAgent)
                                   and not cur_agent.is_entity_observable(a)):
            continue

        # find radial bin
        rad_bin = np.searchsorted(radial_bins, dist)

        # calculate angle
        dx, dy = delta_pos(a, cur_agent)
        ang = np.arctan2(dy, dx)
        if ang < 0:
            ang += 2 * np.pi

        # find angular bin
        ang_bin = np.searchsorted(angular_bins, ang)
        if ang_bin == n_angular_bins:
            ang_bin = 0

        # add count to histogram
        agent_histogram_2d[rad_bin][
            ang_bin] = agent_histogram_2d[rad_bin][ang_bin] + 1

    return agent_histogram_2d, observed_terminations_2d