Exemplo n.º 1
0
    def reset(self,
              regenerate_rail=True,
              regenerate_schedule=True,
              activate_agents=False,
              random_seed=None):
        '''
        Reset the environment
        '''
        # Get a random seed
        if random_seed:
            self._seed(random_seed)

        # Regenerate the rail, if necessary
        optionals = {}
        if regenerate_rail or self.rail is None:
            rail, optionals = self._generate_rail()
            self.rail = rail
            self.height, self.width = self.rail.grid.shape
            self.obs_builder.set_env(self)

        # Set the distance map
        if optionals and 'distance_map' in optionals:
            self.distance_map.set(optionals['distance_map'])

        # Regenerate the schedule, if necessary
        if regenerate_schedule or regenerate_rail or self.get_num_agents(
        ) == 0:
            agents_hints = None
            if optionals and 'agents_hints' in optionals:
                agents_hints = optionals['agents_hints']

            schedule = self.schedule_generator(self.rail,
                                               self.number_of_agents,
                                               agents_hints, self.num_resets,
                                               self.np_random)
            self.agents = EnvAgent.from_schedule(schedule)
            self._max_episode_steps = schedule.max_episode_steps

        # Reset agents positions
        self.agent_positions = np.full((self.height, self.width),
                                       -1,
                                       dtype=int)
        self.reset_agents()
        for i, agent in enumerate(self.agents):
            if activate_agents:
                self.set_agent_active(agent)
            self._break_agent(agent)
            if agent.malfunction_data["malfunction"] > 0:
                agent.speed_data[
                    'transition_action_on_cellexit'] = RailEnvActions.DO_NOTHING
            self._fix_agent_after_malfunction(agent)

            # Reset partial rewards
            self.partial_rewards[i] = 0.0

        # Reset common variables
        self.num_resets += 1
        self._elapsed_steps = 0
        self.dones = dict.fromkeys(
            list(range(self.get_num_agents())) + ["__all__"], False)
        self.arrived_turns = [None] * self.get_num_agents()
        self.stop_actions = [0] * self.get_num_agents()

        # Build the cell orientation graph
        self.railway_encoding = CellOrientationGraph(grid=self.rail.grid,
                                                     agents=self.agents)

        # Reset the state of the observation builder with the new environment
        self.obs_builder.reset()
        self.distance_map.reset(self.agents, self.rail)

        # Reset the malfunction generator
        if "generate" in dir(self.malfunction_generator):
            self.malfunction_generator.generate(reset=True)
        else:
            self.malfunction_generator(reset=True)

        # Empty the episode store of agent positions
        self.cur_episode = []

        # Compute deadlocks
        self.deadlocks_detector.reset(self.get_num_agents())

        # Build the info dict
        self.current_info = {
            'action_required': {},
            'malfunction': {},
            'speed': {},
            'status': {},
            'deadlocks': {},
            'deadlock_turns': {},
            'finished': {},
            'first_time_deadlock': {},
            'first_time_finished': {}
        }
        for i, agent in enumerate(self.agents):
            self.current_info['action_required'][i] = self.action_required(
                agent)
            self.current_info['malfunction'][i] = agent.malfunction_data[
                'malfunction']
            self.current_info['speed'][i] = agent.speed_data['speed']
            self.current_info['status'][i] = agent.status
            self.current_info["deadlocks"][
                i] = self.deadlocks_detector.deadlocks[i]
            self.current_info["deadlock_turns"][
                i] = self.deadlocks_detector.deadlock_turns[i]
            self.current_info["finished"][
                i] = self.dones[i] or self.deadlocks_detector.deadlocks[i]
            self.current_info["first_time_deadlock"][i] = (
                self.deadlocks_detector.deadlocks[i]
                and 0 == self.deadlocks_detector.deadlock_turns[i])
            self.current_info["first_time_finished"][i] = (
                self.dones[i] and 0 == self.arrived_turns[i])

        # Return the new observation vectors for each agent
        observation_dict = self._get_observations()
        return (self._normalize_obs(observation_dict), self.current_info)
Exemplo n.º 2
0
    def reset(self,
              regenerate_rail: bool = True,
              regenerate_schedule: bool = True,
              activate_agents: bool = False,
              random_seed: bool = None) -> (Dict, Dict):
        """
        reset(regenerate_rail, regenerate_schedule, activate_agents, random_seed)

        The method resets the rail environment

        Parameters
        ----------
        regenerate_rail : bool, optional
            regenerate the rails
        regenerate_schedule : bool, optional
            regenerate the schedule and the static agents
        activate_agents : bool, optional
            activate the agents
        random_seed : bool, optional
            random seed for environment

        Returns
        -------
        observation_dict: Dict
            Dictionary with an observation for each agent
        info_dict: Dict with agent specific information

        """

        if random_seed:
            self._seed(random_seed)

        optionals = {}
        if regenerate_rail or self.rail is None:
            rail, optionals = self.rail_generator(self.width, self.height,
                                                  self.number_of_agents,
                                                  self.num_resets,
                                                  self.np_random)

            self.rail = rail
            self.height, self.width = self.rail.grid.shape

            # Do a new set_env call on the obs_builder to ensure
            # that obs_builder specific instantiations are made according to the
            # specifications of the current environment : like width, height, etc
            self.obs_builder.set_env(self)

        if optionals and 'distance_map' in optionals:
            self.distance_map.set(optionals['distance_map'])

        if regenerate_schedule or regenerate_rail or self.get_num_agents(
        ) == 0:
            agents_hints = None
            if optionals and 'agents_hints' in optionals:
                agents_hints = optionals['agents_hints']

            schedule = self.schedule_generator(self.rail,
                                               self.number_of_agents,
                                               agents_hints, self.num_resets,
                                               self.np_random)
            self.agents = EnvAgent.from_schedule(schedule)

            if agents_hints and 'city_orientations' in agents_hints:
                ratio_nr_agents_to_nr_cities = self.get_num_agents() / len(
                    agents_hints['city_orientations'])
                self._max_episode_steps = self.compute_max_episode_steps(
                    width=self.width,
                    height=self.height,
                    ratio_nr_agents_to_nr_cities=ratio_nr_agents_to_nr_cities)
            else:
                self._max_episode_steps = self.compute_max_episode_steps(
                    width=self.width, height=self.height)

        self.agent_positions = np.zeros(
            (self.height, self.width), dtype=int) - 1

        # Reset agents to initial
        self.reset_agents()

        for agent in self.agents:
            # Induce malfunctions
            if activate_agents:
                self.set_agent_active(agent)

            self._break_agent(agent)

            if agent.malfunction_data["malfunction"] > 0:
                agent.speed_data[
                    'transition_action_on_cellexit'] = RailEnvActions.DO_NOTHING

            # Fix agents that finished their malfunction
            self._fix_agent_after_malfunction(agent)

        self.num_resets += 1
        self._elapsed_steps = 0

        # TODO perhaps dones should be part of each agent.
        self.dones = dict.fromkeys(
            list(range(self.get_num_agents())) + ["__all__"], False)

        # Reset the state of the observation builder with the new environment
        self.obs_builder.reset()
        self.distance_map.reset(self.agents, self.rail)

        # Reset the malfunction generator
        self.malfunction_generator(reset=True)

        info_dict: Dict = {
            'action_required': {
                i: self.action_required(agent)
                for i, agent in enumerate(self.agents)
            },
            'malfunction': {
                i: agent.malfunction_data['malfunction']
                for i, agent in enumerate(self.agents)
            },
            'speed': {
                i: agent.speed_data['speed']
                for i, agent in enumerate(self.agents)
            },
            'status': {i: agent.status
                       for i, agent in enumerate(self.agents)}
        }
        # Return the new observation vectors for each agent
        observation_dict: Dict = self._get_observations()
        return observation_dict, info_dict