示例#1
0
    def reset(self, **kwargs):
        """
        Agents are given random positions in the map, targets are given random positions near a random agent.
        Return a full state dict with agent ids (keys) that refer to their observation and global state
        """
        obs_dict = {}
        locations = []
        global_state = {}
        full_state = {}

        init_pose = self.get_init_pose(**kwargs)
        # Initialize agents
        for ii in range(self.num_agents):
            self.agents[ii].reset(init_pose['agents'][ii])
            obs_dict[self.agents[ii].agent_id] = []
        # Initialize targets and beliefs
        for jj in range(self.num_targets):
            self.belief_targets[jj].reset(init_state=np.concatenate(
                (init_pose['belief_targets'][jj][:2], np.zeros(2))),
                                          init_cov=self.target_init_cov)
            self.targets[jj].reset(
                np.concatenate(
                    (init_pose['targets'][jj][:2], self.target_init_vel)))
            locations.append(self.targets[jj].state[:2])
            #For each agent calculate belief of all targets
            for kk in range(self.num_agents):
                r, alpha = util.relative_distance_polar(
                    self.belief_targets[jj].state[:2],
                    xy_base=self.agents[kk].state[:2],
                    theta_base=self.agents[kk].state[2])
                logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
                obs_dict[self.agents[kk].agent_id].extend(
                    [r, alpha, 0.0, 0.0, logdetcov, 0.0])
        # All targets and agents locations relative to map origin (targets then agents)
        for n in range(self.num_agents):
            locations.append(self.agents[n].state[:2])
        global_state = util.global_relative_measure(np.array(locations),
                                                    self.MAP.origin)
        # Full state dict
        for m, agent_id in enumerate(obs_dict):
            obs_dict[agent_id].extend([self.sensor_r, np.pi])
            # Relative location and past action of all other agents
            for p, ids in enumerate(obs_dict):
                if agent_id != ids:
                    r, alpha = util.relative_distance_polar(
                        np.array(self.agents[p].state[:2]),
                        xy_base=self.agents[m].state[:2],
                        theta_base=self.agents[m].state[2])
                    obs_dict[agent_id].extend([r, alpha])
            full_state[agent_id] = {
                'obs': np.asarray(obs_dict[agent_id]),
                'state': np.concatenate((obs_dict[agent_id], global_state))
            }
        return full_state
示例#2
0
 def reset(self, **kwargs):
     """
     Agents are given random positions in the map, targets are given random positions near a random agent.
     Return an observation state dict with agent ids (keys) that refer to their observation
     """
     obs_dict = {}
     init_pose = self.get_init_pose(**kwargs)
     # Initialize agents
     for ii in range(self.num_agents):
         self.agents[ii].reset(init_pose['agents'][ii])
         obs_dict[self.agents[ii].agent_id] = []
     # Initialize targets and beliefs
     for jj in range(self.num_targets):
         self.belief_targets[jj].reset(init_state=np.concatenate(
             (init_pose['belief_targets'][jj][:2], np.zeros(2))),
                                       init_cov=self.target_init_cov)
         self.targets[jj].reset(
             np.concatenate(
                 (init_pose['targets'][jj][:2], self.target_init_vel)))
         #For each agent calculate belief of all targets
         for kk in range(self.num_agents):
             r, alpha = util.relative_distance_polar(
                 self.belief_targets[jj].state[:2],
                 xy_base=self.agents[kk].state[:2],
                 theta_base=self.agents[kk].state[2])
             logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
             obs_dict[self.agents[kk].agent_id].extend(
                 [r, alpha, 0.0, 0.0, logdetcov, 0.0])
     for agent_id in obs_dict:
         obs_dict[agent_id].extend([self.sensor_r, np.pi])
     return obs_dict
示例#3
0
 def reset(self,**kwargs):
     """
     Random initialization of number of targets at the reset of the env epsiode.
     Agents are given random positions in the map, targets are given random positions near a random agent.
     Return an observation state dict with agent ids (keys) that refer to their observation
     """
     try: 
         self.nb_targets = kwargs['nb_targets']
     except:
         self.nb_targets = np.random.random_integers(1, self.num_targets)
     obs_dict = {}
     init_pose = self.get_init_pose(**kwargs)
     # Initialize agents
     for ii in range(self.num_agents):
         self.agents[ii].reset(init_pose['agents'][ii])
         obs_dict[self.agents[ii].agent_id] = []
     # Initialize all targets and beliefs
     for nn in range(self.num_targets):
         self.belief_targets[nn].reset(
                     init_state=np.concatenate((init_pose['belief_targets'][nn][:2], np.zeros(2))),
                     init_cov=self.target_init_cov)
         self.targets[nn].reset(np.concatenate((init_pose['targets'][nn][:2], self.target_init_vel)))
     # For each agent calculate belief of targets assigned
     for jj in range(self.nb_targets):
         for kk in range(self.num_agents):
             r, alpha = util.relative_distance_polar(self.belief_targets[jj].state[:2],
                                         xy_base=self.agents[kk].state[:2], 
                                         theta_base=self.agents[kk].state[2])
             logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
             obs_dict[self.agents[kk].agent_id].append([r, alpha, 0.0, 0.0, logdetcov, 
                                                        0.0, self.sensor_r, np.pi])
     for agent_id in obs_dict:
         obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
     return obs_dict
示例#4
0
    def step(self, action_dict):
        obs_dict = {}
        reward_dict = {}
        done_dict = {'__all__': False}
        info_dict = {}

        # Targets move (t -> t+1)
        for n in range(self.nb_targets):
            self.targets[n].update()
        # Agents move (t -> t+1) and observe the targets
        for ii, agent_id in enumerate(action_dict):
            obs_dict[self.agents[ii].agent_id] = []
            reward_dict[self.agents[ii].agent_id] = []
            done_dict[self.agents[ii].agent_id] = []

            action_vw = self.action_map[action_dict[agent_id]]
            _ = self.agents[ii].update(
                action_vw,
                [t.state[:2] for t in self.targets[:self.nb_targets]])

            observed = []
            # Update beliefs of all targets
            for jj in range(self.num_targets):
                # Observe
                obs = self.observation(self.targets[jj], self.agents[ii])
                observed.append(obs[0])
                self.belief_targets[jj].predict()  # Belief state at t+1
                if obs[0]:  # if observed, update the target belief.
                    self.belief_targets[jj].update(obs[1],
                                                   self.agents[ii].state)

            obstacles_pt = map_utils.get_closest_obstacle(
                self.MAP, self.agents[ii].state)

            if obstacles_pt is None:
                obstacles_pt = (self.sensor_r, np.pi)
            # Calculate beliefs on only assigned targets
            for kk in range(self.nb_targets):
                r_b, alpha_b = util.relative_distance_polar(
                    self.belief_targets[kk].state[:2],
                    xy_base=self.agents[ii].state[:2],
                    theta_base=self.agents[ii].state[-1])
                r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                    self.belief_targets[kk].state[:2],
                    self.belief_targets[kk].state[2:],
                    self.agents[ii].state[:2], self.agents[ii].state[-1],
                    action_vw[0], action_vw[1])
                obs_dict[agent_id].append([
                    r_b, alpha_b, r_dot_b, alpha_dot_b,
                    np.log(LA.det(self.belief_targets[kk].cov)),
                    float(observed[kk]), obstacles_pt[0], obstacles_pt[1]
                ])
            obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
        # Get all rewards after all agents and targets move (t -> t+1)
        reward, done, mean_nlogdetcov = self.get_reward(
            obstacles_pt, observed, self.is_training)
        reward_dict['__all__'], done_dict['__all__'], info_dict[
            'mean_nlogdetcov'] = reward, done, mean_nlogdetcov
        return obs_dict, reward_dict, done_dict, info_dict
示例#5
0
 def reset(self, **kwargs):
     """
     Random initialization a number of agents and targets at the reset of the env epsiode.
     Agents are given random positions in the map, targets are given random positions near a random agent.
     Return an observation state dict with agent ids (keys) that refer to their observation
     """
     try:
         self.nb_agents = kwargs['nb_agents']
         self.nb_targets = kwargs['nb_targets']
     except:
         self.nb_agents = np.random.random_integers(1, self.num_agents)
         self.nb_targets = np.random.random_integers(1, self.num_targets)
         # self.nb_targets=1
     obs_dict = {}
     init_pose = self.get_init_pose(**kwargs)
     # Initialize all agents
     for ii in range(self.num_agents):
         self.agents[ii].reset(init_pose['agents'][ii])
         # Only for nb agents in this episode
         if ii < self.nb_agents:
             obs_dict[self.agents[ii].agent_id] = []
     # Initialize all targets and beliefs
     for nn in range(self.num_targets):
         self.belief_targets[nn].reset(init_state=np.concatenate(
             (init_pose['belief_targets'][nn][:2], np.zeros(2))),
                                       init_cov=self.target_init_cov)
         self.targets[nn].reset(
             np.concatenate(
                 (init_pose['targets'][nn][:2], self.target_init_vel)))
     # For nb agents calculate belief of targets assigned
     for jj in range(self.nb_targets):
         for kk in range(self.nb_agents):
             r, alpha = util.relative_distance_polar(
                 self.belief_targets[jj].state[:2],
                 xy_base=self.agents[kk].state[:2],
                 theta_base=self.agents[kk].state[2])
             logdetcov = np.log(LA.det(self.belief_targets[jj].cov))
             obs_dict[self.agents[kk].agent_id].append(
                 [r, alpha, 0.0, 0.0, logdetcov, 0.0, self.sensor_r, np.pi])
     # Greedily assign agents to closest target in order, all targets assigned if agents > targets
     mask = np.ones(self.nb_targets, bool)
     if self.nb_targets > self.nb_agents:
         oracle = 1
     else:
         oracle = 0
     for agent_id in obs_dict:
         obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
         if np.sum(mask) != np.maximum(
                 0, self.nb_targets - self.nb_agents + oracle):
             idx = np.flatnonzero(mask)
             close = idx[np.argmin(obs_dict[agent_id][:, 0][mask])]
             obs_dict[agent_id] = obs_dict[agent_id][None, close]
             mask[close] = False
     return obs_dict
示例#6
0
 def observation(self, target, agent):
     r, alpha = util.relative_distance_polar(target.state[:2],
                                             xy_base=agent.state[:2],
                                             theta_base=agent.state[2])
     observed = (r <= self.sensor_r) \
                 & (abs(alpha) <= self.fov/2/180*np.pi) \
                 & (not(map_utils.is_blocked(self.MAP, agent.state, target.state)))
     z = None
     if observed:
         z = np.array([r, alpha])
         # z += np.random.multivariate_normal(np.zeros(2,), self.observation_noise(z))
         z += self.np_random.multivariate_normal(np.zeros(2, ),
                                                 self.observation_noise(z))
     '''For some reason, self.np_random is needed only here instead of np.random in order for the 
     RNG seed to work, if used in the gen_rand_pose functions RNG seed will NOT work '''
     return observed, z
示例#7
0
    def step(self, action_dict):
        obs_dict = {}
        reward_dict = {}
        done_dict = {'__all__': False}
        info_dict = {}

        # Targets move (t -> t+1)
        for n in range(self.nb_targets):
            self.targets[n].update()
        # Agents move (t -> t+1) and observe the targets
        for ii, agent_id in enumerate(action_dict):
            obs_dict[self.agents[ii].agent_id] = []
            reward_dict[self.agents[ii].agent_id] = []
            done_dict[self.agents[ii].agent_id] = []

            action_vw = self.action_map[action_dict[agent_id]]

            # Locations of all targets and agents in order to maintain a margin between them
            margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]]
            for p, ids in enumerate(action_dict):
                if agent_id != ids:
                    margin_pos.append(np.array(self.agents[p].state[:2]))
            _ = self.agents[ii].update(action_vw, margin_pos)
            # _ = self.agents[ii].update(action_vw, [t.state[:2] for t in self.targets[:self.nb_targets]])

            observed = []
            # Update beliefs of all targets
            for jj in range(self.num_targets):
                # Observe
                obs = self.observation(self.targets[jj], self.agents[ii])
                observed.append(obs[0])
                self.belief_targets[jj].predict()  # Belief state at t+1
                if obs[0]:  # if observed, update the target belief.
                    self.belief_targets[jj].update(obs[1],
                                                   self.agents[ii].state)

            # obstacles_pt = map_utils.get_closest_obstacle(self.MAP, self.agents[ii].state)

            # if obstacles_pt is None:
            obstacles_pt = (self.sensor_r, np.pi)
            # Calculate beliefs on only assigned targets
            for kk in range(self.nb_targets):
                r_b, alpha_b = util.relative_distance_polar(
                    self.belief_targets[kk].state[:2],
                    xy_base=self.agents[ii].state[:2],
                    theta_base=self.agents[ii].state[-1])
                r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                    self.belief_targets[kk].state[:2],
                    self.belief_targets[kk].state[2:],
                    self.agents[ii].state[:2], self.agents[ii].state[-1],
                    action_vw[0], action_vw[1])
                obs_dict[agent_id].append([
                    r_b, alpha_b, r_dot_b, alpha_dot_b,
                    np.log(LA.det(self.belief_targets[kk].cov)),
                    float(observed[kk]), obstacles_pt[0], obstacles_pt[1]
                ])
        # Greedily assign agents to closest target in order, all targets assigned if agents > targets
        mask = np.ones(self.nb_targets, bool)
        if self.nb_targets > self.nb_agents:
            oracle = 1
        else:
            oracle = 0
        for agent_id in obs_dict:
            obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
            if np.sum(mask) != np.maximum(
                    0, self.nb_targets - self.nb_agents + oracle):
                idx = np.flatnonzero(mask)
                close = idx[np.argmin(obs_dict[agent_id][:, 0][mask])]
                obs_dict[agent_id] = obs_dict[agent_id][None, close]
                mask[close] = False
        # Get all rewards after all agents and targets move (t -> t+1)
        reward, done, mean_nlogdetcov = self.get_reward(
            obstacles_pt, observed, self.is_training)
        reward_dict['__all__'], done_dict['__all__'], info_dict[
            'mean_nlogdetcov'] = reward, done, mean_nlogdetcov
        return obs_dict, reward_dict, done_dict, info_dict
示例#8
0
    def step(self, action_dict):
        obs_dict = {}
        locations = []
        full_state = {}
        reward_dict = {}
        done_dict = {'__all__': False}
        info_dict = {}

        # Targets move (t -> t+1)
        for n in range(self.num_targets):
            self.targets[n].update()
            locations.append(self.targets[n].state[:2])
        # Agents move (t -> t+1) and observe the targets
        for ii, agent_id in enumerate(action_dict):
            obs_dict[self.agents[ii].agent_id] = []
            reward_dict[self.agents[ii].agent_id] = []
            done_dict[self.agents[ii].agent_id] = []

            action_vw = self.action_map[action_dict[agent_id]]
            _ = self.agents[ii].update(action_vw,
                                       [t.state[:2] for t in self.targets])
            locations.append(self.agents[ii].state[:2])

            observed = []
            for jj in range(self.num_targets):
                # Observe
                obs = self.observation(self.targets[jj], self.agents[ii])
                observed.append(obs[0])
                self.belief_targets[jj].predict()  # Belief state at t+1
                if obs[0]:  # if observed, update the target belief.
                    self.belief_targets[jj].update(obs[1],
                                                   self.agents[ii].state)

            obstacles_pt = map_utils.get_closest_obstacle(
                self.MAP, self.agents[ii].state)

            if obstacles_pt is None:
                obstacles_pt = (self.sensor_r, np.pi)
            for kk in range(self.num_targets):
                r_b, alpha_b = util.relative_distance_polar(
                    self.belief_targets[kk].state[:2],
                    xy_base=self.agents[ii].state[:2],
                    theta_base=self.agents[ii].state[-1])
                r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                    self.belief_targets[kk].state[:2],
                    self.belief_targets[kk].state[2:],
                    self.agents[ii].state[:2], self.agents[ii].state[-1],
                    action_vw[0], action_vw[1])
                obs_dict[agent_id].extend([
                    r_b, alpha_b, r_dot_b, alpha_dot_b,
                    np.log(LA.det(self.belief_targets[kk].cov)),
                    float(observed[kk])
                ])
            obs_dict[agent_id].extend([obstacles_pt[0], obstacles_pt[1]])
        #Global state for each agent (ref is origin)
        global_state = util.global_relative_measure(np.array(locations),
                                                    self.MAP.origin)
        # Full state dict
        for m, agent_id in enumerate(obs_dict):
            for p, ids in enumerate(obs_dict):
                if agent_id != ids:
                    # Relative location and recent action of all other agents
                    r, alpha = util.relative_distance_polar(
                        np.array(self.agents[p].state[:2]),
                        xy_base=self.agents[m].state[:2],
                        theta_base=self.agents[m].state[2])
                    obs_dict[agent_id].extend([r, alpha])
            full_state[agent_id] = {
                'obs': np.asarray(obs_dict[agent_id]),
                'state': np.concatenate((obs_dict[agent_id], global_state))
            }
        # Get all rewards after all agents and targets move (t -> t+1)
        reward, done, mean_nlogdetcov = self.get_reward(
            obstacles_pt, observed, self.is_training)
        reward_dict['__all__'], done_dict['__all__'], info_dict[
            'mean_nlogdetcov'] = reward, done, mean_nlogdetcov
        return full_state, reward_dict, done_dict, info_dict
示例#9
0
    def step(self, action_dict):
        obs_dict = {}
        reward_dict = {}
        done_dict = {'__all__': False}
        info_dict = {}

        # Targets move (t -> t+1)
        for n in range(self.nb_targets):
            self.targets[n].update()
            self.belief_targets[n].predict()  # Belief state at t+1
        # Agents move (t -> t+1) and observe the targets
        for ii, agent_id in enumerate(action_dict):
            obs_dict[self.agents[ii].agent_id] = []
            reward_dict[self.agents[ii].agent_id] = []
            done_dict[self.agents[ii].agent_id] = []

            action_vw = self.action_map[action_dict[agent_id]]

            # Locations of all targets and agents in order to maintain a margin between them
            margin_pos = [t.state[:2] for t in self.targets[:self.nb_targets]]
            for p, ids in enumerate(action_dict):
                if agent_id != ids:
                    margin_pos.append(np.array(self.agents[p].state[:2]))
            _ = self.agents[ii].update(action_vw, margin_pos)
            # _ = self.agents[ii].update(action_vw, [t.state[:2] for t in self.targets[:self.nb_targets]])

            observed = np.zeros(self.nb_targets, dtype=bool)
            obstacles_pt = (self.sensor_r, np.pi)
            # Update beliefs of all targets
            for jj in range(self.nb_targets):
                # Observe
                obs, z_t = self.observation(self.targets[jj], self.agents[ii])
                observed[jj] = obs

                if obs:  # if observed, update the target belief.
                    self.belief_targets[jj].update(z_t, self.agents[ii].state)

            # obstacles_pt = map_utils.get_closest_obstacle(self.MAP, self.agents[ii].state)

            # if obstacles_pt is None:

            # Calculate beliefs on only assigned targets
            # for kk in range(self.nb_targets):
                r_b, alpha_b = util.relative_distance_polar(
                    self.belief_targets[jj].state[:2],
                    xy_base=self.agents[ii].state[:2],
                    theta_base=self.agents[ii].state[-1])
                r_dot_b, alpha_dot_b = util.relative_velocity_polar(
                    self.belief_targets[jj].state[:2],
                    self.belief_targets[jj].state[2:],
                    self.agents[ii].state[:2], self.agents[ii].state[-1],
                    action_vw[0], action_vw[1])
                obs_dict[agent_id].append([
                    r_b, alpha_b, r_dot_b, alpha_dot_b,
                    np.log(LA.det(self.belief_targets[jj].cov)),
                    float(obs), obstacles_pt[0], obstacles_pt[1]
                ])
            obs_dict[agent_id] = np.asarray(obs_dict[agent_id])
        # Get all rewards after all agents and targets move (t -> t+1)
        reward, done, mean_nlogdetcov = self.get_reward(
            obstacles_pt, observed, self.is_training)
        reward_dict['__all__'], done_dict['__all__'], info_dict[
            'mean_nlogdetcov'] = reward, done, mean_nlogdetcov
        return obs_dict, reward_dict, done_dict, info_dict