Exemplo n.º 1
0
    def __init__(self, actor_id, game, seed, env_class=None, visualize=False, agent_history_length=1, random_start=False,
                 partially_observed=False):
        try:
            self.env = gym.make(game)
            try:
                self.desc = self.env.unwrapped.desc
            except:
                self.desc = None
        except (NameError, ImportError):
            assert env_class is not None, "The specified environment does not seem to be a registered Gym environment: env_class cannot be None."
            spec = registry.spec(game)
            self.env = env_class(**spec._kwargs)
            self.env.unwrapped._spec = spec
            self.desc = self.env.desc
            self.env = TimeLimit(self.env,
                                 max_episode_steps=self.env.spec.max_episode_steps,
                                 max_episode_seconds=self.env.spec.max_episode_seconds)
        self.env.seed(seed * (actor_id + 1))
        if partially_observed:
            self.env = PartiallyObservedCorridor(self.env)
        else:
            self.env = OneHotObservation(self.env)
        if agent_history_length > 1:
            self.env = ObsStack(self.env, agent_history_length)

        self.agent_history_length = agent_history_length

        self.num_actions = self.env.action_space.n
        self.gym_actions = list(range(self.env.action_space.n))
        self.visualize = visualize

        self.grid_shape = self.desc.shape

        self.game = game
        self.np_random, seed = seeding.np_random(seed)
Exemplo n.º 2
0
    def __init__(
        self,
        name: str,
        clone_seeds: bool = True,
        n_repeat_action: int = 1,
        min_dt: int = 1,
        obs_ram: bool = False,
        episodic_live: bool = False,
        autoreset: bool = True,
    ):

        super(AtariEnvironment, self).__init__(name=name, n_repeat_action=n_repeat_action)
        self.min_dt = min_dt
        self.clone_seeds = clone_seeds
        # This is for removing undocumented wrappers.
        spec = gym_registry.spec(name)
        # not actually needed, but we feel safer
        spec.max_episode_steps = None
        spec.max_episode_time = None
        self._env = spec.make()
        self.action_space = self._env.action_space
        self.observation_space = self._env.observation_space
        self.reward_range = self._env.reward_range
        self.metadata = self._env.metadata
        self.obs_ram = obs_ram
        self.episodic_life = episodic_live
        self.autoreset = autoreset
Exemplo n.º 3
0
 def __init__(
     self,
     check_death: bool = True,
     unprocessed_state: bool = False,
     score_objects: bool = False,
     x_repeat=2,
     objects_from_pixels=False,
     objects_remember_rooms=False,
     only_keys=False,
 ):  # TODO: version that also considers the room objects were found in
     spec = gym_registry.spec("MontezumaRevengeDeterministic-v4")
     # not actually needed, but we feel safer
     spec.max_episode_steps = None
     spec.max_episode_time = None
     self.env = spec.make()
     self.env.reset()
     self.score_objects = score_objects
     self.ram = None
     self.check_death = check_death
     self.cur_steps = 0
     self.cur_score = 0
     self.rooms = {}
     self.room_time = (None, None)
     self.room_threshold = 40
     self.unwrapped.seed(0)
     self.unprocessed_state = unprocessed_state
     self.state = []
     self.ram_death_state = -1
     self.x_repeat = x_repeat
     self.cur_lives = 5
     self.ignore_ram_death = False
     self.objects_from_pixels = objects_from_pixels
     self.objects_remember_rooms = objects_remember_rooms
     self.only_keys = only_keys
     self.pos = MontezumaPosLevel(0, 0, 0, 0, 0)
Exemplo n.º 4
0
def dynamicEnvLoad(env_id):
    """
    Get from Gym, the module where the environment is stored
    :param env_id: (str) environment identity
    :return: (module, str, str) module_env, class_name, env_module_path
    """
    # Get from the env_id, the entry_point, and distinguish if it is a callable, or a string
    entry_point = registry.spec(env_id)._entry_point
    if callable(entry_point):
        class_name = entry_point.__name__
        env_module_path = entry_point.__module__
    else:
        class_name = entry_point.split(':')[1]
        env_module_path = entry_point.split(':')[0]
    # Lets try and dynamically load the module_env, in order to fetch the globals.
    # If it fails, it means that it was unable to load the path from the entry_point
    # should this occure, it will mean that some parameters will not be correctly saved.
    try:
        module_env = importlib.import_module(env_module_path)
    except ImportError:
        raise AssertionError(
            "Error: could not import module {}, ".format(env_module_path) +
            "Halting execution. Are you sure this is a valid environement?")

    return module_env, class_name, env_module_path
Exemplo n.º 5
0
def make_atari(env_id):
    spec = gym_registry.spec(env_id)
    # not actually needed, but we feel safer
    spec.max_episode_steps = None
    spec.max_episode_time = None
    env = spec.make()
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=frame_skip)
    return env
Exemplo n.º 6
0
 def __init__(self,
              name: str,
              clone_seeds: bool = True,
              n_repeat_action: int = 1):
     super(AtariEnvironment, self).__init__(name=name,
                                            n_repeat_action=n_repeat_action)
     self.clone_seeds = clone_seeds
     spec = gym_registry.spec(name)
     # not actually needed, but we feel safer
     spec.max_episode_steps = None
     spec.max_episode_time = None
     self._env = spec.make()
     self.action_space = self._env.action_space
     self.observation_space = self._env.observation_space
     self.reward_range = self._env.reward_range
     self.metadata = self._env.metadata
Exemplo n.º 7
0
 def init_env(self):
     """Initialize the target :class:`gym.Env` instance."""
     # Remove any undocumented wrappers
     spec = gym_registry.spec(self.name)
     if hasattr(spec, "max_episode_steps"):
         setattr(spec, "_max_episode_steps", spec.max_episode_steps)
     if hasattr(spec, "max_episode_time"):
         setattr(spec, "_max_episode_time", spec.max_episode_time)
     spec.max_episode_steps = None
     spec.max_episode_time = None
     self.gym_env: gym.Env = spec.make()
     if self._wrappers is not None:
         self.apply_wrappers(self._wrappers)
     self.action_space = self.gym_env.action_space
     self.observation_space = self.gym_env.observation_space
     self.reward_range = self.gym_env.reward_range
     self.metadata = self.gym_env.metadata
Exemplo n.º 8
0
 def __init__(
     self,
     check_death: bool = True,
     obs_type: str = "rgb",
     score_objects: bool = False,
     objects_from_pixels: bool = False,
     objects_remember_rooms: bool = False,
     only_keys: bool = False,
     death_room_8: bool = True,
 ):  # TODO: version that also considers the room objects were found in
     """Initialize a :class:`CustomMontezuma`."""
     spec = gym_registry.spec("MontezumaRevengeDeterministic-v4")
     # not actually needed, but we feel safer
     spec.max_episode_steps = int(1e100)
     spec.max_episode_time = int(1e100)
     self.env = spec.make()
     self.env.reset()
     self.score_objects = score_objects
     self.ram = None
     self.check_death = check_death
     self.cur_steps = 0
     self.cur_score = 0
     self.rooms = {}
     self.room_time = (None, None)
     self.room_threshold = 40
     self.unwrapped.seed(0)
     self.coords_obs = obs_type == "coords"
     self.state = []
     self.ram_death_state = -1
     self._x_repeat = 2
     self._death_room_8 = death_room_8
     self.cur_lives = 5
     self.ignore_ram_death = False
     self.objects_from_pixels = objects_from_pixels
     self.objects_remember_rooms = objects_remember_rooms
     self.only_keys = only_keys
     self.pos = MontezumaPosLevel(0, 0, 0, 0, 0)
     if self.coords_obs:
         shape = self.get_coords().shape
         self.observation_space = gym.spaces.Box(
             low=-numpy.inf,
             high=numpy.inf,
             dtype=numpy.float32,
             shape=shape,
         )
Exemplo n.º 9
0
    def __init__(
        self,
        state: AtariState = None,
        name: str = "MsPacman-v0",
        clone_seeds: bool = True,
        env: AtariEnv = None,
        fixed_steps: int = 1,
    ):
        """
        Environment class used for managing Atari games. It can be used as a perfect simulation, or
        as an imperfect one. It can handle rgb images, or ram as observations.
        :param name: Name of the atari environment to be created.
                     See: https://gym.openai.com/envs#atari works also with "GameName-ram-v0" like
                     environments.
        :param clone_seeds:  bool;
                 If true, clone the pseudo random number generators of the emulator for a
                 perfect simulation. False provides an stochastic simulation.
        :param env: Openai AtariEnv, optional; Use an already existing env instead of creating one.
        :param fixed_steps: The number of consecutive times that the action will be applied. This
                            allows us to set the frequency at which the policy will play.
        """
        self._clone_seeds = clone_seeds
        self._cum_reward = 0
        if env is None and name:
            spec = gym_registry.spec(name)
            # not actually needed, but we feel safer
            spec.max_episode_steps = None
            spec.max_episode_time = None

            self._env = spec.make()
            self._name = name
        elif env is not None:
            self._env = env
            self._name = env.spec.id
        else:
            raise ValueError("An env or an env name must be specified")
        self._state = AtariState() if state is None else state
        if state is None:
            self._state = self.reset()

        super(AtariEnvironment, self).__init__(name=name,
                                               state=self.state,
                                               fixed_steps=fixed_steps)
Exemplo n.º 10
0
 def __init__(self, name: str = "CartPole-v0", env: gym.Env = None):
     """This initializes the state of the environment to match the desired environment.
     It should deal with the undocumented wrappers that gym has so we avoid random resets when
     simulating.
     """
     super(OpenAIEnvironment, self).__init__(name=name)
     if env is None and name:
         spec = gym_registry.spec(name)
         # not actually needed, but we feel safer
         spec.max_episode_steps = None
         spec.max_episode_time = None
         self._env = spec.make()
         self._name = name
     elif env is not None:
         self._env = env
         self._name = env.spec.id
     else:
         raise ValueError("An env or an env name must be specified")
     self._state = self.reset()
Exemplo n.º 11
0
 def __init__(
     self,
     name: str,
     clone_seeds: bool = True,
     n_repeat_action: int = 1,
     min_dt: int = 1,
     obs_ram: bool = False,
     episodic_live: bool = False,
     autoreset: bool = True,
 ):
     """Create an environment to play OpenAI gym Atari Games.
     :param name: Name of the environment. Follows standard gym syntax rules.
     :param clone_seeds: Clone the random seed of the ALE emulator when
      reading/setting the state.
     :param n_repeat_action: Consecutive number of times a given action will be applied.
     :param min_dt: Internal number of times an action will be applied for each step
     in n_repeat_action.
     :param obs_ram: Use ram as observations even though it is not specified in the
     name parameter.
     :param episodic_live: Return end = True when losing a live.
     :param autoreset: Restart environment when reaching a terminal state.
     """
     super(AtariEnvironment, self).__init__(name=name, n_repeat_action=n_repeat_action)
     self.min_dt = min_dt
     self.clone_seeds = clone_seeds
     # This is for removing undocumented wrappers.
     spec = gym_registry.spec(name)
     # not actually needed, but we feel safer
     spec.max_episode_steps = None
     spec.max_episode_time = None
     self._env = spec.make()
     self.action_space = self._env.action_space
     self.observation_space = self._env.observation_space
     self.reward_range = self._env.reward_range
     self.metadata = self._env.metadata
     self.obs_ram = obs_ram
     self.episodic_life = episodic_live
     self.autoreset = autoreset
Exemplo n.º 12
0
def _make(id_, env_kwargs=None):
    """
    Recreating the gym make function from gym/envs/registration.py
    as such as it can support extra arguments for the environment
    :param id_: (str) The environment ID
    :param env_kwargs: (dict) The extra arguments for the environment
    """
    if env_kwargs is None:
        env_kwargs = {}

    # getting the spec from the ID we want
    spec = registry.spec(id_)

    # Keeping the checks and safe guards of the old code
    assert spec._entry_point is not None, 'Attempting to make deprecated env {}. ' \
                                          '(HINT: is there a newer registered version of this env?)'.format(spec.id_)

    if callable(spec._entry_point):
        env = spec._entry_point(**env_kwargs)
    else:
        cls = load(spec._entry_point)
        # create the env, with the original kwargs, and the new ones overriding them if needed
        env = cls(**{**spec._kwargs, **env_kwargs})

    # Make the enviroment aware of which spec it came from.
    env.unwrapped.spec = spec

    # Keeping the old patching system for _reset, _step and timestep limit
    if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr(
            env, "_gym_disable_underscore_compat", False):
        patch_deprecated_methods(env)
    if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'):
        from gym.wrappers.time_limit import TimeLimit
        env = TimeLimit(env,
                        max_episode_steps=env.spec.max_episode_steps,
                        max_episode_seconds=env.spec.max_episode_seconds)
    return env
Exemplo n.º 13
0
            dict_actions[agent] = action

        observations, rewards, dones, infos = self._env.step(dict_actions)

        obs = tuple([observations[k] for k in self._env.agents])
        rewards = [rewards[k] for k in self._env.agents]
        dones = [dones[k] for k in self._env.agents]
        info = {}
        return obs, rewards, dones, info

    def close(self):
        return self._env.close()


envs = Path(os.path.dirname(os.path.realpath(__file__))).glob("**/*_v?.py")
for e in envs:
    name = e.stem.replace("_", "-")
    lib = e.parent.stem
    filename = e.stem

    gymkey = f"pz-{lib}-{name}"
    register(
        gymkey,
        entry_point="pettingzoo:PettingZooWrapper",
        kwargs={
            "lib_name": lib,
            "env_name": filename,
        },
    )
    registry.spec(gymkey).gymma_wrappers = tuple()