def generator(rail: GridTransitionMap, num_agents: int, hints: Any = None, num_resets: int = 0) -> Schedule: if load_from_package is not None: from importlib_resources import read_binary load_data = read_binary(load_from_package, filename) else: with open(filename, "rb") as file_in: load_data = file_in.read() data = msgpack.unpackb(load_data, use_list=False, encoding='utf-8') if "agents_static" in data: agents = EnvAgent.load_legacy_static_agent(data["agents_static"]) else: agents = [EnvAgent(*d[0:12]) for d in data["agents"]] # setup with loaded data agents_position = [a.initial_position for a in agents] agents_direction = [a.direction for a in agents] agents_target = [a.target for a in agents] agents_speed = [a.speed_data['speed'] for a in agents] agents_malfunction = [ a.malfunction_data['malfunction_rate'] for a in agents ] return Schedule(agent_positions=agents_position, agent_directions=agents_direction, agent_targets=agents_target, agent_speeds=agents_speed, agent_malfunction_rates=None)
def load_env_dict(cls, filename, load_from_package=None): if load_from_package is not None: from importlib_resources import read_binary load_data = read_binary(load_from_package, filename) else: with open(filename, "rb") as file_in: load_data = file_in.read() if filename.endswith("mpk"): env_dict = msgpack.unpackb(load_data, use_list=False, encoding="utf-8") elif filename.endswith("pkl"): env_dict = pickle.loads(load_data) else: print(f"filename {filename} must end with either pkl or mpk") env_dict = {} # Replace the agents tuple with EnvAgent objects if "agents_static" in env_dict: env_dict["agents"] = EnvAgent.load_legacy_static_agent( env_dict["agents_static"]) # remove the legacy key del env_dict["agents_static"] elif "agents" in env_dict: env_dict["agents"] = [ EnvAgent(*d[0:12]) for d in env_dict["agents"] ] return env_dict
def test_dead_end(): transitions = RailEnvTransitions() straight_vertical = int('1000000000100000', 2) # Case 1 - straight straight_horizontal = transitions.rotate_transition(straight_vertical, 90) dead_end_from_south = int('0010000000000000', 2) # Case 7 - dead end # We instantiate the following railway # O->-- where > is the train and O the target. After 6 steps, # the train should be done. rail_map = np.array( [[transitions.rotate_transition(dead_end_from_south, 270)] + [straight_horizontal] * 3 + [transitions.rotate_transition(dead_end_from_south, 90)]], dtype=np.uint16) rail = GridTransitionMap(width=rail_map.shape[1], height=rail_map.shape[0], transitions=transitions) rail.grid = rail_map rail_env = RailEnv(width=rail_map.shape[1], height=rail_map.shape[0], rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, obs_builder_object=GlobalObsForRailEnv()) # We try the configuration in the 4 directions: rail_env.reset() rail_env.agents = [EnvAgent(initial_position=(0, 2), initial_direction=1, direction=1, target=(0, 0), moving=False)] rail_env.reset() rail_env.agents = [EnvAgent(initial_position=(0, 2), initial_direction=3, direction=3, target=(0, 4), moving=False)] # In the vertical configuration: rail_map = np.array( [[dead_end_from_south]] + [[straight_vertical]] * 3 + [[transitions.rotate_transition(dead_end_from_south, 180)]], dtype=np.uint16) rail = GridTransitionMap(width=rail_map.shape[1], height=rail_map.shape[0], transitions=transitions) rail.grid = rail_map rail_env = RailEnv(width=rail_map.shape[1], height=rail_map.shape[0], rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, obs_builder_object=GlobalObsForRailEnv()) rail_env.reset() rail_env.agents = [EnvAgent(initial_position=(2, 0), initial_direction=2, direction=2, target=(0, 0), moving=False)] rail_env.reset() rail_env.agents = [EnvAgent(initial_position=(2, 0), initial_direction=0, direction=0, target=(4, 0), moving=False)]
def _remove_agent_from_scene(self, agent: EnvAgent): """ Remove the agent from the scene. Updates the agent object and the position of the agent inside the global agent_position numpy array Parameters ------- agent: EnvAgent object """ self.agent_positions[agent.position] = -1 if self.remove_agents_at_target: agent.position = None agent.status = RailAgentStatus.DONE_REMOVED
def click_agent(self, cell_row_col): """ The user has clicked on a cell - * If there is an agent, select it * If that agent was already selected, then deselect it * If there is no agent selected, and no agent in the cell, create one * If there is an agent selected, and no agent in the cell, move the selected agent to the cell """ # Has the user clicked on an existing agent? agent_idx = self.find_agent_at(cell_row_col) # This is in case we still have a selected agent even though the env has been recreated # with no agents. if (self.selected_agent is not None) and (self.selected_agent > len(self.env.agents)): self.selected_agent = None # Defensive coding below - for cell_row_col to be a tuple, not a numpy array: # numpy array breaks various things when loading the env. if agent_idx is None: # No if self.selected_agent is None: # Create a new agent and select it. agent = EnvAgent(initial_position=tuple(cell_row_col), initial_direction=0, direction=0, target=tuple(cell_row_col), moving=False, ) self.selected_agent = self.env.add_agent(agent) # self.env.set_agent_active(agent) self.view.oRT.update_background() else: # Move the selected agent to this cell agent = self.env.agents[self.selected_agent] agent.initial_position = tuple(cell_row_col) agent.position = tuple(cell_row_col) agent.old_position = tuple(cell_row_col) else: # Yes # Have they clicked on the agent already selected? if self.selected_agent is not None and agent_idx == self.selected_agent: # Yes - deselect the agent self.selected_agent = None else: # No - select the agent self.selected_agent = agent_idx self.redraw()
def test_load_env(): env = RailEnv(10, 10) env.reset() env.load_resource('env_data.tests', 'test-10x10.mpk') agent_static = EnvAgent((0, 0), 2, (5, 5), False) env.add_agent(agent_static) assert env.get_num_agents() == 1
def _break_agent(self, agent: EnvAgent): """ Malfunction generator that breaks agents at a given rate. Parameters ---------- agent """ malfunction: Malfunction = self.malfunction_generator(agent, self.np_random) if malfunction.num_broken_steps > 0: agent.malfunction_data['malfunction'] = malfunction.num_broken_steps agent.malfunction_data['moving_before_malfunction'] = agent.moving agent.malfunction_data['nr_malfunctions'] += 1 return
def test_load_env(): #env = RailEnv(10, 10) #env.reset() # env.load_resource('env_data.tests', 'test-10x10.mpk') env, env_dict = RailEnvPersister.load_resource("env_data.tests", "test-10x10.mpk") #env, env_dict = RailEnvPersister.load_new("./env_data/tests/test-10x10.mpk") agent_static = EnvAgent((0, 0), 2, (5, 5), False) env.add_agent(agent_static) assert env.get_num_agents() == 1
def set_full_state_msg(self, msg_data): """ Sets environment state with msgdata object passed as argument Parameters ------- msg_data: msgpack object """ data = msgpack.unpackb(msg_data, use_list=False, encoding='utf-8') self.rail.grid = np.array(data["grid"]) # agents are always reset as not moving if "agents_static" in data: self.agents = EnvAgent.load_legacy_static_agent(data["agents_static"]) else: self.agents = [EnvAgent(*d[0:12]) for d in data["agents"]] # setup with loaded data self.height, self.width = self.rail.grid.shape self.rail.height = self.height self.rail.width = self.width self.dones = dict.fromkeys(list(range(self.get_num_agents())) + ["__all__"], False)
def _set_agent_to_initial_position(self, agent: EnvAgent, new_position: IntVector2D): """ Sets the agent to its initial position. Updates the agent object and the position of the agent inside the global agent_position numpy array Parameters ------- agent: EnvAgent object new_position: IntVector2D """ agent.position = new_position self.agent_positions[agent.position] = agent.handle
def _move_agent_to_new_position(self, agent: EnvAgent, new_position: IntVector2D): """ Move the agent to the a new position. Updates the agent object and the position of the agent inside the global agent_position numpy array Parameters ------- agent: EnvAgent object new_position: IntVector2D """ agent.position = new_position self.agent_positions[agent.old_position] = -1 self.agent_positions[agent.position] = agent.handle
def click_agent(self, cell_row_col): """ The user has clicked on a cell - * If there is an agent, select it * If that agent was already selected, then deselect it * If there is no agent selected, and no agent in the cell, create one * If there is an agent selected, and no agent in the cell, move the selected agent to the cell """ # Has the user clicked on an existing agent? agent_idx = self.find_agent_at(cell_row_col) if agent_idx is None: # No if self.selected_agent is None: # Create a new agent and select it. agent = EnvAgent(position=cell_row_col, direction=0, target=cell_row_col, moving=False) self.selected_agent = self.env.add_agent(agent) self.view.oRT.update_background() else: # Move the selected agent to this cell agent = self.env.agents[self.selected_agent] agent.position = cell_row_col agent.old_position = cell_row_col else: # Yes # Have they clicked on the agent already selected? if self.selected_agent is not None and agent_idx == self.selected_agent: # Yes - deselect the agent self.selected_agent = None else: # No - select the agent self.selected_agent = agent_idx self.redraw()
def _fix_agent_after_malfunction(self, agent: EnvAgent): """ Updates agent malfunction variables and fixes broken agents Parameters ---------- agent """ # Ignore agents that are OK if self._is_agent_ok(agent): return # Reduce number of malfunction steps left if agent.malfunction_data['malfunction'] > 1: agent.malfunction_data['malfunction'] -= 1 return # Restart agents at the end of their malfunction agent.malfunction_data['malfunction'] -= 1 if 'moving_before_malfunction' in agent.malfunction_data: agent.moving = agent.malfunction_data['moving_before_malfunction'] return
def reset(self, regenerate_rail=True, regenerate_schedule=True, activate_agents=False, random_seed=None): ''' Reset the environment ''' # Get a random seed if random_seed: self._seed(random_seed) # Regenerate the rail, if necessary optionals = {} if regenerate_rail or self.rail is None: rail, optionals = self._generate_rail() self.rail = rail self.height, self.width = self.rail.grid.shape self.obs_builder.set_env(self) # Set the distance map if optionals and 'distance_map' in optionals: self.distance_map.set(optionals['distance_map']) # Regenerate the schedule, if necessary if regenerate_schedule or regenerate_rail or self.get_num_agents( ) == 0: agents_hints = None if optionals and 'agents_hints' in optionals: agents_hints = optionals['agents_hints'] schedule = self.schedule_generator(self.rail, self.number_of_agents, agents_hints, self.num_resets, self.np_random) self.agents = EnvAgent.from_schedule(schedule) self._max_episode_steps = schedule.max_episode_steps # Reset agents positions self.agent_positions = np.full((self.height, self.width), -1, dtype=int) self.reset_agents() for i, agent in enumerate(self.agents): if activate_agents: self.set_agent_active(agent) self._break_agent(agent) if agent.malfunction_data["malfunction"] > 0: agent.speed_data[ 'transition_action_on_cellexit'] = RailEnvActions.DO_NOTHING self._fix_agent_after_malfunction(agent) # Reset partial rewards self.partial_rewards[i] = 0.0 # Reset common variables self.num_resets += 1 self._elapsed_steps = 0 self.dones = dict.fromkeys( list(range(self.get_num_agents())) + ["__all__"], False) self.arrived_turns = [None] * self.get_num_agents() self.stop_actions = [0] * self.get_num_agents() # Build the cell orientation graph self.railway_encoding = CellOrientationGraph(grid=self.rail.grid, agents=self.agents) # Reset the state of the observation builder with the new environment self.obs_builder.reset() self.distance_map.reset(self.agents, self.rail) # Reset the malfunction generator if "generate" in dir(self.malfunction_generator): self.malfunction_generator.generate(reset=True) else: self.malfunction_generator(reset=True) # Empty the episode store of agent positions self.cur_episode = [] # Compute deadlocks self.deadlocks_detector.reset(self.get_num_agents()) # Build the info dict self.current_info = { 'action_required': {}, 'malfunction': {}, 'speed': {}, 'status': {}, 'deadlocks': {}, 'deadlock_turns': {}, 'finished': {}, 'first_time_deadlock': {}, 'first_time_finished': {} } for i, agent in enumerate(self.agents): self.current_info['action_required'][i] = self.action_required( agent) self.current_info['malfunction'][i] = agent.malfunction_data[ 'malfunction'] self.current_info['speed'][i] = agent.speed_data['speed'] self.current_info['status'][i] = agent.status self.current_info["deadlocks"][ i] = self.deadlocks_detector.deadlocks[i] self.current_info["deadlock_turns"][ i] = self.deadlocks_detector.deadlock_turns[i] self.current_info["finished"][ i] = self.dones[i] or self.deadlocks_detector.deadlocks[i] self.current_info["first_time_deadlock"][i] = ( self.deadlocks_detector.deadlocks[i] and 0 == self.deadlocks_detector.deadlock_turns[i]) self.current_info["first_time_finished"][i] = ( self.dones[i] and 0 == self.arrived_turns[i]) # Return the new observation vectors for each agent observation_dict = self._get_observations() return (self._normalize_obs(observation_dict), self.current_info)
def reset(self, regenerate_rail: bool = True, regenerate_schedule: bool = True, activate_agents: bool = False, random_seed: bool = None) -> (Dict, Dict): """ reset(regenerate_rail, regenerate_schedule, activate_agents, random_seed) The method resets the rail environment Parameters ---------- regenerate_rail : bool, optional regenerate the rails regenerate_schedule : bool, optional regenerate the schedule and the static agents activate_agents : bool, optional activate the agents random_seed : bool, optional random seed for environment Returns ------- observation_dict: Dict Dictionary with an observation for each agent info_dict: Dict with agent specific information """ if random_seed: self._seed(random_seed) optionals = {} if regenerate_rail or self.rail is None: rail, optionals = self.rail_generator(self.width, self.height, self.number_of_agents, self.num_resets, self.np_random) self.rail = rail self.height, self.width = self.rail.grid.shape # Do a new set_env call on the obs_builder to ensure # that obs_builder specific instantiations are made according to the # specifications of the current environment : like width, height, etc self.obs_builder.set_env(self) if optionals and 'distance_map' in optionals: self.distance_map.set(optionals['distance_map']) if regenerate_schedule or regenerate_rail or self.get_num_agents( ) == 0: agents_hints = None if optionals and 'agents_hints' in optionals: agents_hints = optionals['agents_hints'] schedule = self.schedule_generator(self.rail, self.number_of_agents, agents_hints, self.num_resets, self.np_random) self.agents = EnvAgent.from_schedule(schedule) if agents_hints and 'city_orientations' in agents_hints: ratio_nr_agents_to_nr_cities = self.get_num_agents() / len( agents_hints['city_orientations']) self._max_episode_steps = self.compute_max_episode_steps( width=self.width, height=self.height, ratio_nr_agents_to_nr_cities=ratio_nr_agents_to_nr_cities) else: self._max_episode_steps = self.compute_max_episode_steps( width=self.width, height=self.height) self.agent_positions = np.zeros( (self.height, self.width), dtype=int) - 1 # Reset agents to initial self.reset_agents() for agent in self.agents: # Induce malfunctions if activate_agents: self.set_agent_active(agent) self._break_agent(agent) if agent.malfunction_data["malfunction"] > 0: agent.speed_data[ 'transition_action_on_cellexit'] = RailEnvActions.DO_NOTHING # Fix agents that finished their malfunction self._fix_agent_after_malfunction(agent) self.num_resets += 1 self._elapsed_steps = 0 # TODO perhaps dones should be part of each agent. self.dones = dict.fromkeys( list(range(self.get_num_agents())) + ["__all__"], False) # Reset the state of the observation builder with the new environment self.obs_builder.reset() self.distance_map.reset(self.agents, self.rail) # Reset the malfunction generator self.malfunction_generator(reset=True) info_dict: Dict = { 'action_required': { i: self.action_required(agent) for i, agent in enumerate(self.agents) }, 'malfunction': { i: agent.malfunction_data['malfunction'] for i, agent in enumerate(self.agents) }, 'speed': { i: agent.speed_data['speed'] for i, agent in enumerate(self.agents) }, 'status': {i: agent.status for i, agent in enumerate(self.agents)} } # Return the new observation vectors for each agent observation_dict: Dict = self._get_observations() return observation_dict, info_dict
def set_agent_active(self, agent: EnvAgent): if agent.status == RailAgentStatus.READY_TO_DEPART and self.cell_free( agent.initial_position): agent.status = RailAgentStatus.ACTIVE self._set_agent_to_initial_position(agent, agent.initial_position)