Пример #1
0
    def __init__(self,
                 venv: VecEnv,
                 n_stack: int,
                 channels_order: Optional[Union[str, Dict[str, str]]] = None):
        self.venv = venv
        self.n_stack = n_stack

        wrapped_obs_space = venv.observation_space

        if isinstance(wrapped_obs_space, spaces.Box):
            assert not isinstance(
                channels_order, dict
            ), f"Expected None or string for channels_order but received {channels_order}"
            self.stackedobs = StackedObservations(venv.num_envs, n_stack,
                                                  wrapped_obs_space,
                                                  channels_order)

        elif isinstance(wrapped_obs_space, spaces.Dict):
            self.stackedobs = StackedDictObservations(venv.num_envs, n_stack,
                                                      wrapped_obs_space,
                                                      channels_order)

        else:
            raise Exception(
                "VecFrameStack only works with gym.spaces.Box and gym.spaces.Dict observation spaces"
            )

        observation_space = self.stackedobs.stack_observation_space(
            wrapped_obs_space)
        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Пример #2
0
 def __init__(
     self,
     venv: VecEnv,
     training: bool = True,
     norm_obs: bool = True,
     norm_reward: bool = True,
     clip_obs: float = 10.0,
     clip_reward: float = 10.0,
     gamma: float = 0.99,
     epsilon: float = 1e-8,
 ):
     VecEnvWrapper.__init__(self, venv)
     self.obs_rms = RunningMeanStd(shape=self.observation_space.shape)
     self.ret_rms = RunningMeanStd(shape=())
     self.clip_obs = clip_obs
     self.clip_reward = clip_reward
     # Returns: discounted rewards
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon
     self.training = training
     self.norm_obs = norm_obs
     self.norm_reward = norm_reward
     self.old_obs = np.array([])
     self.old_reward = np.array([])
    def __init__(self, venv, video_folder, record_video_trigger,
                 video_length=200, name_prefix='rl-video'):

        VecEnvWrapper.__init__(self, venv)

        self.env = venv
        # Temp variable to retrieve metadata
        temp_env = venv

        # Unwrap to retrieve metadata dict
        # that will be used by gym recorder
        while isinstance(temp_env, VecNormalize) or isinstance(temp_env, VecFrameStack):
            temp_env = temp_env.venv

        if isinstance(temp_env, DummyVecEnv) or isinstance(temp_env, SubprocVecEnv):
            metadata = temp_env.get_attr('metadata')[0]
        else:
            metadata = temp_env.metadata

        self.env.metadata = metadata

        self.record_video_trigger = record_video_trigger
        self.video_recorder = None

        self.video_folder = os.path.abspath(video_folder)
        # Create output folder if needed
        os.makedirs(self.video_folder, exist_ok=True)

        self.name_prefix = name_prefix
        self.step_id = 0
        self.video_length = video_length

        self.recording = False
        self.recorded_frames = 0
Пример #4
0
 def __init__(self, venv, raise_exception=False, warn_once=True, check_inf=True):
     VecEnvWrapper.__init__(self, venv)
     self.raise_exception = raise_exception
     self.warn_once = warn_once
     self.check_inf = check_inf
     self._actions = None
     self._observations = None
     self._user_warned = False
 def __init__(self, venv: VecEnv, n_stack: int):
     self.venv = venv
     self.n_stack = n_stack
     wrapped_obs_space = venv.observation_space
     low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=-1)
     high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs,) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Пример #6
0
 def __init__(self, venv: VecEnv, n_stack: int):
     self.venv = venv
     self.n_stack = n_stack
     wrapped_obs_space = venv.observation_space
     assert isinstance(
         wrapped_obs_space, spaces.Box
     ), "VecFrameStack only work with gym.spaces.Box observation space"
     low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=-1)
     high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=-1)
     self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype)
     observation_space = spaces.Box(low=low,
                                    high=high,
                                    dtype=venv.observation_space.dtype)
     VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
Пример #7
0
    def set_venv(self, venv: VecEnv) -> None:
        """
        Sets the vector environment to wrap to venv.

        Also sets attributes derived from this such as `num_env`.

        :param venv:
        """
        if self.venv is not None:
            raise ValueError(
                "Trying to set venv of already initialized VecNormalize wrapper."
            )
        VecEnvWrapper.__init__(self, venv)
        if self.obs_rms.mean.shape != self.observation_space.shape:
            raise ValueError("venv is incompatible with current statistics.")
        self.ret = np.zeros(self.num_envs)
Пример #8
0
    def __init__(
            self,
            venv: VecEnv,
            filename: Optional[str] = None,
            info_keywords: Tuple[str, ...] = (),
    ):
        # Avoid circular import
        from stable_baselines3.common.monitor import Monitor, ResultsWriter

        # This check is not valid for special `VecEnv`
        # like the ones created by Procgen, that does follow completely
        # the `VecEnv` interface
        try:
            is_wrapped_with_monitor = venv.env_is_wrapped(Monitor)[0]
        except AttributeError:
            is_wrapped_with_monitor = False

        if is_wrapped_with_monitor:
            warnings.warn(
                "The environment is already wrapped with a `Monitor` wrapper"
                "but you are wrapping it with a `VecMonitor` wrapper, the `Monitor` statistics will be"
                "overwritten by the `VecMonitor` ones.",
                UserWarning,
            )

        VecEnvWrapper.__init__(self, venv)
        self.episode_returns = None
        self.episode_lengths = None
        self.episode_count = 0
        self.t_start = time.time()

        env_id = None
        if hasattr(venv, "spec") and venv.spec is not None:
            env_id = venv.spec.id

        if filename:
            self.results_writer = ResultsWriter(filename,
                                                header={
                                                    "t_start": self.t_start,
                                                    "env_id": env_id
                                                },
                                                extra_keys=info_keywords)
        else:
            self.results_writer = None
        self.info_keywords = info_keywords
Пример #9
0
    def set_venv(self, venv: VecEnv) -> None:
        """
        Sets the vector environment to wrap to venv.

        Also sets attributes derived from this such as `num_env`.

        :param venv:
        """
        if self.venv is not None:
            raise ValueError(
                "Trying to set venv of already initialized VecNormalize wrapper."
            )
        VecEnvWrapper.__init__(self, venv)

        # Check only that the observation_space match
        utils.check_for_correct_spaces(venv, self.observation_space,
                                       venv.action_space)
        self.returns = np.zeros(self.num_envs)
Пример #10
0
    def __init__(
        self,
        venv: VecEnv,
        training: bool = True,
        norm_obs: bool = True,
        norm_reward: bool = True,
        clip_obs: float = 10.0,
        clip_reward: float = 10.0,
        gamma: float = 0.99,
        epsilon: float = 1e-8,
        norm_obs_keys: Optional[List[str]] = None,
    ):
        VecEnvWrapper.__init__(self, venv)

        self.norm_obs = norm_obs
        self.norm_obs_keys = norm_obs_keys
        # Check observation spaces
        if self.norm_obs:
            self._sanity_checks()

            if isinstance(self.observation_space, gym.spaces.Dict):
                self.obs_spaces = self.observation_space.spaces
                self.obs_rms = {
                    key: RunningMeanStd(shape=self.obs_spaces[key].shape)
                    for key in self.norm_obs_keys
                }
            else:
                self.obs_spaces = None
                self.obs_rms = RunningMeanStd(
                    shape=self.observation_space.shape)

        self.ret_rms = RunningMeanStd(shape=())
        self.clip_obs = clip_obs
        self.clip_reward = clip_reward
        # Returns: discounted rewards
        self.returns = np.zeros(self.num_envs)
        self.gamma = gamma
        self.epsilon = epsilon
        self.training = training
        self.norm_obs = norm_obs
        self.norm_reward = norm_reward
        self.old_obs = np.array([])
        self.old_reward = np.array([])
Пример #11
0
    def __init__(
        self,
        venv: VecEnv,
        training: bool = True,
        norm_obs: bool = True,
        norm_reward: bool = True,
        clip_obs: float = 10.0,
        clip_reward: float = 10.0,
        gamma: float = 0.99,
        epsilon: float = 1e-8,
    ):
        VecEnvWrapper.__init__(self, venv)

        if norm_obs:
            if not isinstance(self.observation_space,
                              (gym.spaces.Box, gym.spaces.Dict)):
                raise ValueError(
                    "VecNormalize only supports `gym.spaces.Box` and `gym.spaces.Dict` observation spaces"
                )

        if isinstance(self.observation_space, gym.spaces.Dict):
            self.obs_keys = set(self.observation_space.spaces.keys())
            self.obs_spaces = self.observation_space.spaces
            self.obs_rms = {
                key: RunningMeanStd(shape=space.shape)
                for key, space in self.obs_spaces.items()
            }
        else:
            self.obs_keys, self.obs_spaces = None, None
            self.obs_rms = RunningMeanStd(shape=self.observation_space.shape)

        self.ret_rms = RunningMeanStd(shape=())
        self.clip_obs = clip_obs
        self.clip_reward = clip_reward
        # Returns: discounted rewards
        self.returns = np.zeros(self.num_envs)
        self.gamma = gamma
        self.epsilon = epsilon
        self.training = training
        self.norm_obs = norm_obs
        self.norm_reward = norm_reward
        self.old_obs = np.array([])
        self.old_reward = np.array([])
Пример #12
0
    def __init__(self,
                 venv: VecEnv,
                 n_stack: int,
                 channels_order: Optional[str] = None):
        self.venv = venv
        self.n_stack = n_stack

        wrapped_obs_space = venv.observation_space
        assert isinstance(
            wrapped_obs_space, spaces.Box
        ), "VecFrameStack only work with gym.spaces.Box observation space"

        if channels_order is None:
            # Detect channel location automatically for images
            if is_image_space(wrapped_obs_space):
                self.channels_first = is_image_space_channels_first(
                    wrapped_obs_space)
            else:
                # Default behavior for non-image space, stack on the last axis
                self.channels_first = False
        else:
            assert channels_order in {
                "last", "first"
            }, "`channels_order` must be one of following: 'last', 'first'"

            self.channels_first = channels_order == "first"

        # This includes the vec-env dimension (first)
        self.stack_dimension = 1 if self.channels_first else -1
        repeat_axis = 0 if self.channels_first else -1
        low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=repeat_axis)
        high = np.repeat(wrapped_obs_space.high,
                         self.n_stack,
                         axis=repeat_axis)
        self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype)
        observation_space = spaces.Box(low=low,
                                       high=high,
                                       dtype=venv.observation_space.dtype)
        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
 def close(self):
     VecEnvWrapper.close(self)
     self.close_video_recorder()