def __init__(self, actor_id, game, seed, env_class=None, visualize=False, agent_history_length=1, random_start=False, partially_observed=False): try: self.env = gym.make(game) try: self.desc = self.env.unwrapped.desc except: self.desc = None except (NameError, ImportError): assert env_class is not None, "The specified environment does not seem to be a registered Gym environment: env_class cannot be None." spec = registry.spec(game) self.env = env_class(**spec._kwargs) self.env.unwrapped._spec = spec self.desc = self.env.desc self.env = TimeLimit(self.env, max_episode_steps=self.env.spec.max_episode_steps, max_episode_seconds=self.env.spec.max_episode_seconds) self.env.seed(seed * (actor_id + 1)) if partially_observed: self.env = PartiallyObservedCorridor(self.env) else: self.env = OneHotObservation(self.env) if agent_history_length > 1: self.env = ObsStack(self.env, agent_history_length) self.agent_history_length = agent_history_length self.num_actions = self.env.action_space.n self.gym_actions = list(range(self.env.action_space.n)) self.visualize = visualize self.grid_shape = self.desc.shape self.game = game self.np_random, seed = seeding.np_random(seed)
def __init__( self, name: str, clone_seeds: bool = True, n_repeat_action: int = 1, min_dt: int = 1, obs_ram: bool = False, episodic_live: bool = False, autoreset: bool = True, ): super(AtariEnvironment, self).__init__(name=name, n_repeat_action=n_repeat_action) self.min_dt = min_dt self.clone_seeds = clone_seeds # This is for removing undocumented wrappers. spec = gym_registry.spec(name) # not actually needed, but we feel safer spec.max_episode_steps = None spec.max_episode_time = None self._env = spec.make() self.action_space = self._env.action_space self.observation_space = self._env.observation_space self.reward_range = self._env.reward_range self.metadata = self._env.metadata self.obs_ram = obs_ram self.episodic_life = episodic_live self.autoreset = autoreset
def __init__( self, check_death: bool = True, unprocessed_state: bool = False, score_objects: bool = False, x_repeat=2, objects_from_pixels=False, objects_remember_rooms=False, only_keys=False, ): # TODO: version that also considers the room objects were found in spec = gym_registry.spec("MontezumaRevengeDeterministic-v4") # not actually needed, but we feel safer spec.max_episode_steps = None spec.max_episode_time = None self.env = spec.make() self.env.reset() self.score_objects = score_objects self.ram = None self.check_death = check_death self.cur_steps = 0 self.cur_score = 0 self.rooms = {} self.room_time = (None, None) self.room_threshold = 40 self.unwrapped.seed(0) self.unprocessed_state = unprocessed_state self.state = [] self.ram_death_state = -1 self.x_repeat = x_repeat self.cur_lives = 5 self.ignore_ram_death = False self.objects_from_pixels = objects_from_pixels self.objects_remember_rooms = objects_remember_rooms self.only_keys = only_keys self.pos = MontezumaPosLevel(0, 0, 0, 0, 0)
def dynamicEnvLoad(env_id): """ Get from Gym, the module where the environment is stored :param env_id: (str) environment identity :return: (module, str, str) module_env, class_name, env_module_path """ # Get from the env_id, the entry_point, and distinguish if it is a callable, or a string entry_point = registry.spec(env_id)._entry_point if callable(entry_point): class_name = entry_point.__name__ env_module_path = entry_point.__module__ else: class_name = entry_point.split(':')[1] env_module_path = entry_point.split(':')[0] # Lets try and dynamically load the module_env, in order to fetch the globals. # If it fails, it means that it was unable to load the path from the entry_point # should this occure, it will mean that some parameters will not be correctly saved. try: module_env = importlib.import_module(env_module_path) except ImportError: raise AssertionError( "Error: could not import module {}, ".format(env_module_path) + "Halting execution. Are you sure this is a valid environement?") return module_env, class_name, env_module_path
def make_atari(env_id): spec = gym_registry.spec(env_id) # not actually needed, but we feel safer spec.max_episode_steps = None spec.max_episode_time = None env = spec.make() assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=frame_skip) return env
def __init__(self, name: str, clone_seeds: bool = True, n_repeat_action: int = 1): super(AtariEnvironment, self).__init__(name=name, n_repeat_action=n_repeat_action) self.clone_seeds = clone_seeds spec = gym_registry.spec(name) # not actually needed, but we feel safer spec.max_episode_steps = None spec.max_episode_time = None self._env = spec.make() self.action_space = self._env.action_space self.observation_space = self._env.observation_space self.reward_range = self._env.reward_range self.metadata = self._env.metadata
def init_env(self): """Initialize the target :class:`gym.Env` instance.""" # Remove any undocumented wrappers spec = gym_registry.spec(self.name) if hasattr(spec, "max_episode_steps"): setattr(spec, "_max_episode_steps", spec.max_episode_steps) if hasattr(spec, "max_episode_time"): setattr(spec, "_max_episode_time", spec.max_episode_time) spec.max_episode_steps = None spec.max_episode_time = None self.gym_env: gym.Env = spec.make() if self._wrappers is not None: self.apply_wrappers(self._wrappers) self.action_space = self.gym_env.action_space self.observation_space = self.gym_env.observation_space self.reward_range = self.gym_env.reward_range self.metadata = self.gym_env.metadata
def __init__( self, check_death: bool = True, obs_type: str = "rgb", score_objects: bool = False, objects_from_pixels: bool = False, objects_remember_rooms: bool = False, only_keys: bool = False, death_room_8: bool = True, ): # TODO: version that also considers the room objects were found in """Initialize a :class:`CustomMontezuma`.""" spec = gym_registry.spec("MontezumaRevengeDeterministic-v4") # not actually needed, but we feel safer spec.max_episode_steps = int(1e100) spec.max_episode_time = int(1e100) self.env = spec.make() self.env.reset() self.score_objects = score_objects self.ram = None self.check_death = check_death self.cur_steps = 0 self.cur_score = 0 self.rooms = {} self.room_time = (None, None) self.room_threshold = 40 self.unwrapped.seed(0) self.coords_obs = obs_type == "coords" self.state = [] self.ram_death_state = -1 self._x_repeat = 2 self._death_room_8 = death_room_8 self.cur_lives = 5 self.ignore_ram_death = False self.objects_from_pixels = objects_from_pixels self.objects_remember_rooms = objects_remember_rooms self.only_keys = only_keys self.pos = MontezumaPosLevel(0, 0, 0, 0, 0) if self.coords_obs: shape = self.get_coords().shape self.observation_space = gym.spaces.Box( low=-numpy.inf, high=numpy.inf, dtype=numpy.float32, shape=shape, )
def __init__( self, state: AtariState = None, name: str = "MsPacman-v0", clone_seeds: bool = True, env: AtariEnv = None, fixed_steps: int = 1, ): """ Environment class used for managing Atari games. It can be used as a perfect simulation, or as an imperfect one. It can handle rgb images, or ram as observations. :param name: Name of the atari environment to be created. See: https://gym.openai.com/envs#atari works also with "GameName-ram-v0" like environments. :param clone_seeds: bool; If true, clone the pseudo random number generators of the emulator for a perfect simulation. False provides an stochastic simulation. :param env: Openai AtariEnv, optional; Use an already existing env instead of creating one. :param fixed_steps: The number of consecutive times that the action will be applied. This allows us to set the frequency at which the policy will play. """ self._clone_seeds = clone_seeds self._cum_reward = 0 if env is None and name: spec = gym_registry.spec(name) # not actually needed, but we feel safer spec.max_episode_steps = None spec.max_episode_time = None self._env = spec.make() self._name = name elif env is not None: self._env = env self._name = env.spec.id else: raise ValueError("An env or an env name must be specified") self._state = AtariState() if state is None else state if state is None: self._state = self.reset() super(AtariEnvironment, self).__init__(name=name, state=self.state, fixed_steps=fixed_steps)
def __init__(self, name: str = "CartPole-v0", env: gym.Env = None): """This initializes the state of the environment to match the desired environment. It should deal with the undocumented wrappers that gym has so we avoid random resets when simulating. """ super(OpenAIEnvironment, self).__init__(name=name) if env is None and name: spec = gym_registry.spec(name) # not actually needed, but we feel safer spec.max_episode_steps = None spec.max_episode_time = None self._env = spec.make() self._name = name elif env is not None: self._env = env self._name = env.spec.id else: raise ValueError("An env or an env name must be specified") self._state = self.reset()
def __init__( self, name: str, clone_seeds: bool = True, n_repeat_action: int = 1, min_dt: int = 1, obs_ram: bool = False, episodic_live: bool = False, autoreset: bool = True, ): """Create an environment to play OpenAI gym Atari Games. :param name: Name of the environment. Follows standard gym syntax rules. :param clone_seeds: Clone the random seed of the ALE emulator when reading/setting the state. :param n_repeat_action: Consecutive number of times a given action will be applied. :param min_dt: Internal number of times an action will be applied for each step in n_repeat_action. :param obs_ram: Use ram as observations even though it is not specified in the name parameter. :param episodic_live: Return end = True when losing a live. :param autoreset: Restart environment when reaching a terminal state. """ super(AtariEnvironment, self).__init__(name=name, n_repeat_action=n_repeat_action) self.min_dt = min_dt self.clone_seeds = clone_seeds # This is for removing undocumented wrappers. spec = gym_registry.spec(name) # not actually needed, but we feel safer spec.max_episode_steps = None spec.max_episode_time = None self._env = spec.make() self.action_space = self._env.action_space self.observation_space = self._env.observation_space self.reward_range = self._env.reward_range self.metadata = self._env.metadata self.obs_ram = obs_ram self.episodic_life = episodic_live self.autoreset = autoreset
def _make(id_, env_kwargs=None): """ Recreating the gym make function from gym/envs/registration.py as such as it can support extra arguments for the environment :param id_: (str) The environment ID :param env_kwargs: (dict) The extra arguments for the environment """ if env_kwargs is None: env_kwargs = {} # getting the spec from the ID we want spec = registry.spec(id_) # Keeping the checks and safe guards of the old code assert spec._entry_point is not None, 'Attempting to make deprecated env {}. ' \ '(HINT: is there a newer registered version of this env?)'.format(spec.id_) if callable(spec._entry_point): env = spec._entry_point(**env_kwargs) else: cls = load(spec._entry_point) # create the env, with the original kwargs, and the new ones overriding them if needed env = cls(**{**spec._kwargs, **env_kwargs}) # Make the enviroment aware of which spec it came from. env.unwrapped.spec = spec # Keeping the old patching system for _reset, _step and timestep limit if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr( env, "_gym_disable_underscore_compat", False): patch_deprecated_methods(env) if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'): from gym.wrappers.time_limit import TimeLimit env = TimeLimit(env, max_episode_steps=env.spec.max_episode_steps, max_episode_seconds=env.spec.max_episode_seconds) return env
dict_actions[agent] = action observations, rewards, dones, infos = self._env.step(dict_actions) obs = tuple([observations[k] for k in self._env.agents]) rewards = [rewards[k] for k in self._env.agents] dones = [dones[k] for k in self._env.agents] info = {} return obs, rewards, dones, info def close(self): return self._env.close() envs = Path(os.path.dirname(os.path.realpath(__file__))).glob("**/*_v?.py") for e in envs: name = e.stem.replace("_", "-") lib = e.parent.stem filename = e.stem gymkey = f"pz-{lib}-{name}" register( gymkey, entry_point="pettingzoo:PettingZooWrapper", kwargs={ "lib_name": lib, "env_name": filename, }, ) registry.spec(gymkey).gymma_wrappers = tuple()