def make_vec_envs( env_name, seed, num_processes, gamma, log_dir, device, allow_early_resets, training=True, num_frame_stack=None, red=False, ): envs = [ make_env(env_name, seed, i, log_dir, allow_early_resets) for i in range(num_processes) ] if len(envs) > 1: envs = ShmemVecEnv(envs, context='fork') else: envs = DummyVecEnv(envs) # Dont filter if RED obfilt = not red if len(envs.observation_space.shape) == 1: if gamma is None: envs = VecNormalize(envs, ob=obfilt, ret=False) else: envs = VecNormalize(envs, ob=obfilt, gamma=gamma) if not training: envs.eval() elif env_name.startswith('CarRacing'): # Car Racing, use a normalizer for rewards envs = VecNormalize(envs, ob=False, ret=training, clipob=1e10, cliprew=1.0) if not training: envs.eval() envs = VecPyTorch(envs, device) # Hack for now is_atari = env_name.startswith('MiniGrid') or env_name.startswith( 'CarRacing') is_atari = not is_atari if num_frame_stack is not None: envs = VecPyTorchFrameStack(envs, num_frame_stack, device) elif len(envs.observation_space.shape) == 3: envs = VecPyTorchFrameStack(envs, 4, device) return envs
def make_vec_envs(env_id, seed, num_processes, gamma, log_dir=None, device=torch.device('cpu'), obs_keys=None, allow_early_resets=False, save_video=False, num_frame_stack=None, max_steps=None, evaluating=False): envs = [ make_env(env_id, seed, i, log_dir, obs_keys, allow_early_resets, save_video, max_steps=max_steps) for i in range(num_processes) ] if len(envs) > 1: envs = ShmemVecEnv(envs, context='fork') else: envs = DummyVecEnv(envs) if len(envs.observation_space.shape) == 1: if gamma is None: envs = VecNormalize(envs, ret=False) else: envs = VecNormalize(envs, gamma=gamma) # since our network is frozen, an online normalization would make observations diverge from what it learned if evaluating: envs.eval() envs = VecPyTorch(envs, device) if num_frame_stack is not None: envs = VecPyTorchFrameStack(envs, num_frame_stack, device) elif len(envs.observation_space.shape) == 3: envs = VecPyTorchFrameStack(envs, 4, device) return envs