def __init__(self, venv, video_folder, record_video_trigger, video_length=200, name_prefix='rl-video'): VecEnvWrapper.__init__(self, venv) self.env = venv # Temp variable to retrieve metadata temp_env = venv # Unwrap to retrieve metadata dict # that will be used by gym recorder while isinstance(temp_env, VecNormalize) or isinstance(temp_env, VecFrameStack): temp_env = temp_env.venv if isinstance(temp_env, DummyVecEnv) or isinstance(temp_env, SubprocVecEnv): metadata = temp_env.get_attr('metadata')[0] else: metadata = temp_env.metadata self.env.metadata = metadata self.record_video_trigger = record_video_trigger self.video_recorder = None self.video_folder = os.path.abspath(video_folder) # Create output folder if needed os.makedirs(self.video_folder, exist_ok=True) self.name_prefix = name_prefix self.step_id = 0 self.video_length = video_length self.recording = False self.recorded_frames = 0
def __init__(self, venv: VecEnv, n_stack: int, channels_order: Optional[Union[str, Dict[str, str]]] = None): self.venv = venv self.n_stack = n_stack wrapped_obs_space = venv.observation_space if isinstance(wrapped_obs_space, spaces.Box): assert not isinstance( channels_order, dict ), f"Expected None or string for channels_order but received {channels_order}" self.stackedobs = StackedObservations(venv.num_envs, n_stack, wrapped_obs_space, channels_order) elif isinstance(wrapped_obs_space, spaces.Dict): self.stackedobs = StackedDictObservations(venv.num_envs, n_stack, wrapped_obs_space, channels_order) else: raise Exception( "VecFrameStack only works with gym.spaces.Box and gym.spaces.Dict observation spaces" ) observation_space = self.stackedobs.stack_observation_space( wrapped_obs_space) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__( self, venv: VecEnv, training: bool = True, norm_obs: bool = True, norm_reward: bool = True, clip_obs: float = 10.0, clip_reward: float = 10.0, gamma: float = 0.99, epsilon: float = 1e-8, ): VecEnvWrapper.__init__(self, venv) self.obs_rms = RunningMeanStd(shape=self.observation_space.shape) self.ret_rms = RunningMeanStd(shape=()) self.clip_obs = clip_obs self.clip_reward = clip_reward # Returns: discounted rewards self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon self.training = training self.norm_obs = norm_obs self.norm_reward = norm_reward self.old_obs = np.array([]) self.old_reward = np.array([])
def __init__(self, venv, raise_exception=False, warn_once=True, check_inf=True): VecEnvWrapper.__init__(self, venv) self.raise_exception = raise_exception self.warn_once = warn_once self.check_inf = check_inf self._actions = None self._observations = None self._user_warned = False
def __init__(self, venv: VecEnv, n_stack: int): self.venv = venv self.n_stack = n_stack wrapped_obs_space = venv.observation_space low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=-1) high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=-1) self.stackedobs = np.zeros((venv.num_envs,) + low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv: VecEnv, n_stack: int): self.venv = venv self.n_stack = n_stack wrapped_obs_space = venv.observation_space assert isinstance( wrapped_obs_space, spaces.Box ), "VecFrameStack only work with gym.spaces.Box observation space" low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=-1) high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=-1) self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def set_venv(self, venv: VecEnv) -> None: """ Sets the vector environment to wrap to venv. Also sets attributes derived from this such as `num_env`. :param venv: """ if self.venv is not None: raise ValueError( "Trying to set venv of already initialized VecNormalize wrapper." ) VecEnvWrapper.__init__(self, venv) if self.obs_rms.mean.shape != self.observation_space.shape: raise ValueError("venv is incompatible with current statistics.") self.ret = np.zeros(self.num_envs)
def __init__( self, venv: VecEnv, filename: Optional[str] = None, info_keywords: Tuple[str, ...] = (), ): # Avoid circular import from stable_baselines3.common.monitor import Monitor, ResultsWriter # This check is not valid for special `VecEnv` # like the ones created by Procgen, that does follow completely # the `VecEnv` interface try: is_wrapped_with_monitor = venv.env_is_wrapped(Monitor)[0] except AttributeError: is_wrapped_with_monitor = False if is_wrapped_with_monitor: warnings.warn( "The environment is already wrapped with a `Monitor` wrapper" "but you are wrapping it with a `VecMonitor` wrapper, the `Monitor` statistics will be" "overwritten by the `VecMonitor` ones.", UserWarning, ) VecEnvWrapper.__init__(self, venv) self.episode_returns = None self.episode_lengths = None self.episode_count = 0 self.t_start = time.time() env_id = None if hasattr(venv, "spec") and venv.spec is not None: env_id = venv.spec.id if filename: self.results_writer = ResultsWriter(filename, header={ "t_start": self.t_start, "env_id": env_id }, extra_keys=info_keywords) else: self.results_writer = None self.info_keywords = info_keywords
def set_venv(self, venv: VecEnv) -> None: """ Sets the vector environment to wrap to venv. Also sets attributes derived from this such as `num_env`. :param venv: """ if self.venv is not None: raise ValueError( "Trying to set venv of already initialized VecNormalize wrapper." ) VecEnvWrapper.__init__(self, venv) # Check only that the observation_space match utils.check_for_correct_spaces(venv, self.observation_space, venv.action_space) self.returns = np.zeros(self.num_envs)
def __init__( self, venv: VecEnv, training: bool = True, norm_obs: bool = True, norm_reward: bool = True, clip_obs: float = 10.0, clip_reward: float = 10.0, gamma: float = 0.99, epsilon: float = 1e-8, norm_obs_keys: Optional[List[str]] = None, ): VecEnvWrapper.__init__(self, venv) self.norm_obs = norm_obs self.norm_obs_keys = norm_obs_keys # Check observation spaces if self.norm_obs: self._sanity_checks() if isinstance(self.observation_space, gym.spaces.Dict): self.obs_spaces = self.observation_space.spaces self.obs_rms = { key: RunningMeanStd(shape=self.obs_spaces[key].shape) for key in self.norm_obs_keys } else: self.obs_spaces = None self.obs_rms = RunningMeanStd( shape=self.observation_space.shape) self.ret_rms = RunningMeanStd(shape=()) self.clip_obs = clip_obs self.clip_reward = clip_reward # Returns: discounted rewards self.returns = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon self.training = training self.norm_obs = norm_obs self.norm_reward = norm_reward self.old_obs = np.array([]) self.old_reward = np.array([])
def __init__( self, venv: VecEnv, training: bool = True, norm_obs: bool = True, norm_reward: bool = True, clip_obs: float = 10.0, clip_reward: float = 10.0, gamma: float = 0.99, epsilon: float = 1e-8, ): VecEnvWrapper.__init__(self, venv) if norm_obs: if not isinstance(self.observation_space, (gym.spaces.Box, gym.spaces.Dict)): raise ValueError( "VecNormalize only supports `gym.spaces.Box` and `gym.spaces.Dict` observation spaces" ) if isinstance(self.observation_space, gym.spaces.Dict): self.obs_keys = set(self.observation_space.spaces.keys()) self.obs_spaces = self.observation_space.spaces self.obs_rms = { key: RunningMeanStd(shape=space.shape) for key, space in self.obs_spaces.items() } else: self.obs_keys, self.obs_spaces = None, None self.obs_rms = RunningMeanStd(shape=self.observation_space.shape) self.ret_rms = RunningMeanStd(shape=()) self.clip_obs = clip_obs self.clip_reward = clip_reward # Returns: discounted rewards self.returns = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon self.training = training self.norm_obs = norm_obs self.norm_reward = norm_reward self.old_obs = np.array([]) self.old_reward = np.array([])
def __init__(self, venv: VecEnv, n_stack: int, channels_order: Optional[str] = None): self.venv = venv self.n_stack = n_stack wrapped_obs_space = venv.observation_space assert isinstance( wrapped_obs_space, spaces.Box ), "VecFrameStack only work with gym.spaces.Box observation space" if channels_order is None: # Detect channel location automatically for images if is_image_space(wrapped_obs_space): self.channels_first = is_image_space_channels_first( wrapped_obs_space) else: # Default behavior for non-image space, stack on the last axis self.channels_first = False else: assert channels_order in { "last", "first" }, "`channels_order` must be one of following: 'last', 'first'" self.channels_first = channels_order == "first" # This includes the vec-env dimension (first) self.stack_dimension = 1 if self.channels_first else -1 repeat_axis = 0 if self.channels_first else -1 low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=repeat_axis) high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=repeat_axis) self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)