def test_vec_env(tmpdir): """Test VecNormalize Object""" clip_obs = 0.5 clip_reward = 5.0 orig_venv = DummyVecEnv([make_env]) norm_venv = VecNormalize(orig_venv, norm_obs=True, norm_reward=True, clip_obs=clip_obs, clip_reward=clip_reward) _, done = norm_venv.reset(), [False] while not done[0]: actions = [norm_venv.action_space.sample()] obs, rew, done, _ = norm_venv.step(actions) assert np.max(np.abs(obs)) <= clip_obs assert np.max(np.abs(rew)) <= clip_reward path = str(tmpdir.join("vec_normalize")) norm_venv.save(path) deserialized = VecNormalize.load(path, venv=orig_venv) check_vec_norm_equal(norm_venv, deserialized)
def single_wrappers( single_venv, scheduler, our_idx, normalize, normalize_observations, rew_shape, rew_shape_params, embed_index, embed_paths, embed_types, debug, env_name, load_policy, lookback_params, transparent_params, log_callbacks, save_callbacks, ): if rew_shape: rew_shape_venv = apply_reward_wrapper( single_env=single_venv, scheduler=scheduler, shaping_params=rew_shape_params, agent_idx=our_idx, ) log_callbacks.append(LoggerOnlyLogCallback(rew_shape_venv)) single_venv = rew_shape_venv for anneal_type in ["noise", "rew_shape"]: if scheduler.is_conditional(anneal_type): scheduler.set_annealer_get_logs(anneal_type, rew_shape_venv.get_logs) if lookback_params["lb_num"] > 0: if len(embed_types) > 1: raise ValueError( "Lookback is not supported with multiple embedded agents") embed_path = embed_paths[0] embed_type = embed_types[0] lookback_venv = LookbackRewardVecWrapper( single_venv, env_name, debug, embed_index, embed_path, embed_type, transparent_params, **lookback_params, ) single_venv = lookback_venv if normalize: if normalize_observations: if load_policy["path"] is not None: if load_policy["type"] == "zoo": raise ValueError( "Trying to normalize twice. Bansal et al's Zoo agents normalize " "implicitly. Please set normalize=False to disable VecNormalize." ) normalized_venv = VecNormalize(single_venv) else: normalized_venv = VecNormalize(single_venv, norm_obs=False) if load_policy["path"] is not None and load_policy["type"] != "zoo": normalized_venv.load_running_average(load_policy["path"]) save_callbacks.append(lambda root_dir: normalized_venv.save( os.path.join(root_dir, "vec_normalize.pkl"))) single_venv = normalized_venv return single_venv