def wrap_environment(wrapped_class, wrappers=None, **kwargs): """Helper for wrapping environment classes.""" if wrappers is None: wrappers = [] env_class = load(wrapped_class) env = env_class(**kwargs) for wrapper, wrapper_kwargs in wrappers: wrapper_class = load(wrapper) wrapper = wrapper_class(**wrapper_kwargs) env = wrapper(env) return env
def mujoco_wrapper(entry_point, **kwargs): # Load the environment from its entry point env_cls = load(entry_point) env = env_cls(**kwargs) # Normalization wrapper env = NormalizedActionWrapper(env) return env
def make_discrete_task_by_id( env: str, **kwargs, ) -> Union[Dict[str, Any], Any]: # Load the entry-point class, and use it to determine what handler to use. # TODO: Actually instantiate the env here? or just dispatch based on the env class? if env not in env_registry.env_specs: raise RuntimeError( f"Can't create a task for env id {env}, since it isn't a registered env id." ) env_spec: EnvSpec = env_registry.env_specs[env] env_entry_point: Callable[..., gym.Env] = load(env_spec.entry_point) # import inspect try: task: ContinuousTask = make_discrete_task_from_type( env_entry_point, **kwargs) return task except RuntimeError as exc: warnings.warn( RuntimeWarning( f"A temporary environment will have to be created in order to make a task: {exc}" )) with gym.make(env) as temp_env: # IDEA: Could avoid re-creating the env between calls to this function, for # instance by saving a single temp env in a global variable and overwriting # it if `env` is of a different type. return make_task_fn(temp_env, **kwargs)
def universe_wrapper(entry_point, **kwargs): # Load the environment from its entry point env_cls = load(entry_point) env = env_cls(**kwargs) # Preprocessing wrappers env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, dim=84, rgb=True) env = ScaledFloatFrame(env) return env
def of( cls, original: EnvSpec, *, new_id: str, new_reward_threshold: Optional[float] = None, new_nondeterministic: Optional[bool] = None, new_max_episode_steps: Optional[int] = None, new_kwargs: Dict[str, Any] = None, new_entry_point: Union[str, Callable[..., gym.Env]] = None, wrappers: Optional[List[Callable[[gym.Env], gym.Env]]] = None, ) -> "VariantEnvSpec": """ Returns a new env spec which uses additional wrappers. NOTE: The `new_kwargs` update the current kwargs, rather than replacing them. """ new_spec_kwargs = original._kwargs new_spec_kwargs.update(new_kwargs or {}) # Replace the entry-point if desired: new_spec_entry_point: Union[str, Callable[ ..., EnvType]] = new_entry_point or original.entry_point new_reward_threshold = (new_reward_threshold if new_reward_threshold is not None else original.reward_threshold) new_nondeterministic = (new_nondeterministic if new_nondeterministic is not None else original.nondeterministic) new_max_episode_steps = (new_max_episode_steps if new_max_episode_steps is not None else original.max_episode_steps) # Add wrappers if desired. if wrappers: # Get the callable that creates the env. if callable(original.entry_point): env_fn = original.entry_point else: env_fn = load(original.entry_point) # @lebrice Not sure if there is a cleaner way to do this, maybe using # functools.reduce or functools.partial? def _new_entry_point(**kwargs) -> gym.Env: env = env_fn(**kwargs) for wrapper in wrappers: env = wrapper(env) return env new_spec_entry_point = _new_entry_point return cls( new_id, base_spec=original, entry_point=new_spec_entry_point, reward_threshold=new_reward_threshold, nondeterministic=new_nondeterministic, max_episode_steps=new_max_episode_steps, kwargs=new_spec_kwargs, )
def rand_wrapper(entry_point, **kwargs): # Load the environment from its entry point env_cls = load(entry_point) env = env_cls(**kwargs) # Randomization wrapper env = NormalizedActionWrapper(RandomizedEnvWrapper(env)) # env = RandomizedEnvWrapper(env) return env
def mujoco_wrapper(entry_point, **kwargs): normalization_scale = kwargs.pop('normalization_scale', 1.) max_episode_steps = kwargs.pop('max_episode_steps', 200) # Load the environment from its entry point env_cls = load(entry_point) env = env_cls(**kwargs) # Normalization wrapper env = NormalizedActionWrapper(env, scale=normalization_scale) # Time limit env = TimeLimit(env, max_episode_steps=max_episode_steps) return env
def get_env_class( env: Union[str, gym.Env, Type[gym.Env], Callable[[], gym.Env]] ) -> Type[gym.Env]: if isinstance(env, partial): if env.func is gym.make and isinstance(env.args[0], str): return get_env_class(env.args[0]) return get_env_class(env.func) if isinstance(env, str): return load(env) if isinstance(env, gym.Wrapper): return type(env.unwrapped) if isinstance(env, gym.Env): return type(env) if inspect.isclass(env) and issubclass(env, gym.Env): return env raise NotImplementedError( f"Don't know how to get the class of env being used by {env}!")
def _make(id_, env_kwargs=None): """ Recreating the gym make function from gym/envs/registration.py as such as it can support extra arguments for the environment :param id_: (str) The environment ID :param env_kwargs: (dict) The extra arguments for the environment """ if env_kwargs is None: env_kwargs = {} # getting the spec from the ID we want spec = registry.spec(id_) # Keeping the checks and safe guards of the old code assert spec._entry_point is not None, 'Attempting to make deprecated env {}. ' \ '(HINT: is there a newer registered version of this env?)'.format(spec.id_) if callable(spec._entry_point): env = spec._entry_point(**env_kwargs) else: cls = load(spec._entry_point) # create the env, with the original kwargs, and the new ones overriding them if needed env = cls(**{**spec._kwargs, **env_kwargs}) # Make the enviroment aware of which spec it came from. env.unwrapped.spec = spec # Keeping the old patching system for _reset, _step and timestep limit if hasattr(env, "_reset") and hasattr(env, "_step") and not getattr( env, "_gym_disable_underscore_compat", False): patch_deprecated_methods(env) if (env.spec.timestep_limit is not None) and not spec.tags.get('vnc'): from gym.wrappers.time_limit import TimeLimit env = TimeLimit(env, max_episode_steps=env.spec.max_episode_steps, max_episode_seconds=env.spec.max_episode_seconds) return env
def create_test_env(env_id, n_envs=1, is_atari=False, stats_path=None, seed=0, log_dir='', should_render=True, hyperparams=None): """ Create environment for testing a trained agent :param env_id: (str) :param n_envs: (int) number of processes :param is_atari: (bool) :param stats_path: (str) path to folder containing saved running averaged :param seed: (int) Seed for random number generator :param log_dir: (str) Where to log rewards :param should_render: (bool) For Pybullet env, display the GUI :param env_wrapper: (type) A subclass of gym.Wrapper to wrap the original env with :param hyperparams: (dict) Additional hyperparams (ex: n_stack) :return: (gym.Env) """ # HACK to save logs if log_dir is not None: os.environ["OPENAI_LOG_FORMAT"] = 'csv' os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir) os.makedirs(log_dir, exist_ok=True) logger.configure() # Create the environment and wrap it if necessary env_wrapper = get_wrapper_class(hyperparams) if 'env_wrapper' in hyperparams.keys(): del hyperparams['env_wrapper'] if is_atari: print("Using Atari wrapper") #env = make_atari_env(env_id, num_env=n_envs, seed=seed) ## Frame-stacking with 4 frames #env = VecFrameStack(env, n_stack=4) elif n_envs > 1: # start_method = 'spawn' for thread safe env = SubprocVecEnv([make_env(env_id, i, seed, log_dir, wrapper_class=env_wrapper) for i in range(n_envs)]) # Pybullet envs does not follow gym.render() interface elif "Bullet" in env_id: spec = gym.envs.registry.env_specs[env_id] try: class_ = load(spec.entry_point) except AttributeError: # Backward compatibility with gym class_ = load(spec._entry_point) # HACK: force SubprocVecEnv for Bullet env that does not # have a render argument render_name = None use_subproc = 'renders' not in inspect.getfullargspec(class_.__init__).args if not use_subproc: render_name = 'renders' # Dev branch of pybullet # use_subproc = use_subproc and 'render' not in inspect.getfullargspec(class_.__init__).args # if not use_subproc and render_name is None: # render_name = 'render' # Create the env, with the original kwargs, and the new ones overriding them if needed def _init(): # TODO: fix for pybullet locomotion envs env = class_(**{**spec._kwargs}, **{render_name: should_render}) env.seed(0) if log_dir is not None: env = Monitor(env, os.path.join(log_dir, "0"), allow_early_resets=True) return env if use_subproc: env = SubprocVecEnv([make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper)]) else: env = DummyVecEnv([_init]) else: env = DummyVecEnv([make_env(env_id, 0, seed, log_dir, wrapper_class=env_wrapper)]) # Load saved stats for normalizing input and rewards # And optionally stack frames if stats_path is not None: if hyperparams['normalize']: print("Loading running average") print("with params: {}".format(hyperparams['normalize_kwargs'])) env = VecNormalize(env, training=False, **hyperparams['normalize_kwargs']) env.load_running_average(stats_path) n_stack = hyperparams.get('frame_stack', 0) if n_stack > 0: print("Stacking {} frames".format(n_stack)) env = VecFrameStack(env, n_stack) return env
def mujoco_wrapper(entry_point, **kwargs): # Load the environment from its entry point env_cls = load(entry_point) env = env_cls(**kwargs) return env
def get_class_and_kwargs(spec_or_id): if isinstance(spec_or_id, registration.EnvSpec): spec = spec_or_id else: spec = registration.spec(spec_or_id) return registration.load(spec._entry_point), spec._kwargs
def create_test_env(env_id, n_envs=1, stats_path=None, seed=0, log_dir=None, should_render=True, hyperparams=None, env_params={}): """ Create environment for testing a trained agent :param env_id: (str) :param n_envs: (int) number of processes :param stats_path: (str) path to folder containing saved running averaged :param seed: (int) Seed for random number generator :param log_dir: (str) Where to log rewards :param should_render: (bool) For Pybullet env, display the GUI :param hyperparams: (dict) Additional hyperparams for the env (ex: n_stack) :param env_params: (dict) the parameters to change in env :return: (gym.Env) """ # If the environment is not found, suggest the closest match registered_envs = set(gym.envs.registry.env_specs.keys()) if env_id not in registered_envs: closest_match = difflib.get_close_matches(env_id, registered_envs, n=1)[0] raise ValueError( '{} not found in gym registry, you maybe meant {}?'.format( env_id, closest_match)) is_atari = 'NoFrameskip' in env_id # HACK to save logs if log_dir is not None: os.environ["OPENAI_LOG_FORMAT"] = 'log' os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir) os.makedirs(log_dir, exist_ok=True) logger.configure() # Create the environment and wrap it if necessary if is_atari: print("Using Atari wrapper") env = make_atari_env(env_id, num_env=n_envs, seed=seed) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) elif n_envs > 1: env = SubprocVecEnv([ make_env(env_id, i, seed, log_dir, env_params=env_params, params_path=stats_path) for i in range(n_envs) ]) # Pybullet envs does not follow gym.render() interface elif "Bullet" in env_id: spec = gym.envs.registry.env_specs[env_id] class_ = load(spec._entry_point) # HACK: force SubprocVecEnv for Bullet env that does not # have a render argument render_name = None use_subproc = 'renders' not in inspect.getfullargspec( class_.__init__).args if not use_subproc: render_name = 'renders' # Dev branch of pybullet # use_subproc = use_subproc and 'render' not in inspect.getfullargspec(class_.__init__).args # if not use_subproc and render_name is None: # render_name = 'render' # Create the env, with the original kwargs, and the new ones overriding them if needed def _init(): # TODO: fix for pybullet locomotion envs env = class_(**{**spec._kwargs}, **{render_name: should_render}) if len(env_params) > 0: env = modify_env_params(env, stats_path, **env_params) env.seed(0) if log_dir is not None: env = Monitor(env, os.path.join(log_dir, "0"), allow_early_resets=True) return env if use_subproc: env = SubprocVecEnv([ make_env(env_id, 0, seed, log_dir, env_params=env_params, params_path=stats_path) ]) else: env = DummyVecEnv([_init]) else: env = DummyVecEnv([ make_env(env_id, 0, seed, log_dir, env_params=env_params, params_path=stats_path) ]) # Load saved stats for normalizing input and rewards # And optionally stack frames if stats_path is not None: if hyperparams['normalize']: print("Loading running average") print("with params: {}".format(hyperparams['normalize_kwargs'])) if 'norm_reward' in hyperparams['normalize_kwargs']: del hyperparams['normalize_kwargs']['norm_reward'] env = VecNormalize(env, training=False, **hyperparams['normalize_kwargs'], norm_reward=False) env.load_running_average(stats_path) n_stack = hyperparams.get('n_stack', 0) if n_stack > 0: print("Stacking {} frames".format(n_stack)) env = VecFrameStack(env, n_stack) return env
def create_test_env(env_id, n_envs=1, is_atari=False, stats_path=None, norm_reward=False, seed=0, log_dir='', should_render=True): """ Create environment for testing a trained agent :param env_id: (str) :param n_envs: (int) number of processes :param is_atari: (bool) :param stats_path: (str) path to folder containing saved running averaged :param norm_reward: (bool) Whether to normalize rewards or not when using Vecnormalize :param seed: (int) Seed for random number generator :param log_dir: (str) Where to log rewards :param should_render: (bool) For Pybullet env, display the GUI :return: (gym.Env) """ # HACK to save logs if log_dir is not None: os.environ["OPENAI_LOG_FORMAT"] = 'csv' os.environ["OPENAI_LOGDIR"] = os.path.abspath(log_dir) os.makedirs(log_dir, exist_ok=True) logger.configure() # Create the environment and wrap it if necessary if is_atari: print("Using Atari wrapper") env = make_atari_env(env_id, num_env=n_envs, seed=seed) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) elif n_envs > 1: env = SubprocVecEnv( [make_env(env_id, i, seed, log_dir) for i in range(n_envs)]) # Pybullet envs does not follow gym.render() interface elif "Bullet" in env_id: spec = gym.envs.registry.env_specs[env_id] class_ = load(spec._entry_point) # HACK: force SubprocVecEnv for Bullet env that does not # have a render argument use_subproc = 'renders' not in inspect.getfullargspec( class_.__init__).args # Create the env, with the original kwargs, and the new ones overriding them if needed def _init(): # TODO: fix for pybullet locomotion envs env = class_(**{**spec._kwargs}, renders=should_render) env.seed(0) if log_dir is not None: env = Monitor(env, os.path.join(log_dir, "0"), allow_early_resets=True) return env if use_subproc: env = SubprocVecEnv([make_env(env_id, 0, seed, log_dir)]) else: env = DummyVecEnv([_init]) else: env = DummyVecEnv([make_env(env_id, 0, seed, log_dir)]) # Load saved stats for normalizing input and rewards # And optionally stack frames if stats_path is not None: if os.path.join(stats_path, 'obs_rms.pkl'): print("Loading running average") env = VecNormalize(env, training=False, norm_reward=norm_reward) env.load_running_average(stats_path) n_stack_file = os.path.join(stats_path, 'n_stack') if os.path.isfile(n_stack_file): with open(n_stack_file, 'r') as f: n_stack = int(f.read()) print("Stacking {} frames".format(n_stack)) env = VecFrameStack(env, n_stack) return env