def test_act_noise_simple(env): # Typical case with zero mean and non-zero std wrapped_env = GaussianActNoiseWrapper(env, noise_std=0.2*np.ones(env.act_space.shape)) for _ in range(3): # Sample some values rand_act = env.act_space.sample_uniform() wrapped_env.reset() obs_nom, _, _, _ = env.step(rand_act) obs_wrapped, _, _, _ = wrapped_env.step(rand_act) # Different actions can not lead to the same observation assert not np.all(obs_nom == obs_wrapped) # Unusual case with non-zero mean and zero std wrapped_env = GaussianActNoiseWrapper(env, noise_mean=0.1*np.ones(env.act_space.shape)) for _ in range(3): # Sample some values rand_act = env.act_space.sample_uniform() wrapped_env.reset() obs_nom, _, _, _ = env.step(rand_act) obs_wrapped, _, _, _ = wrapped_env.step(rand_act) # Different actions can not lead to the same observation assert not np.all(obs_nom == obs_wrapped) # General case with non-zero mean and non-zero std wrapped_env = GaussianActNoiseWrapper(env, noise_mean=0.1*np.ones(env.act_space.shape), noise_std=0.2*np.ones(env.act_space.shape)) for _ in range(3): # Sample some values rand_act = env.act_space.sample_uniform() wrapped_env.reset() obs_nom, _, _, _ = env.step(rand_act) obs_wrapped, _, _, _ = wrapped_env.step(rand_act) # Different actions can not lead to the same observation assert not np.all(obs_nom == obs_wrapped)
def test_order_act_noise_act_norm(env: SimEnv): # First noise wrapper then normalization wrapper wrapped_env_noise = GaussianActNoiseWrapper( env, noise_mean=0.2 * np.ones(env.act_space.shape), noise_std=0.1 * np.ones(env.act_space.shape)) wrapped_env_noise_norm = ActNormWrapper(wrapped_env_noise) # First normalization wrapper then noise wrapper wrapped_env_norm = ActNormWrapper(env) wrapped_env_norm_noise = GaussianActNoiseWrapper( wrapped_env_norm, noise_mean=0.2 * np.ones(env.act_space.shape), noise_std=0.1 * np.ones(env.act_space.shape)) # Sample some values directly from the act_spaces for i in range(3): pyrado.set_seed(i) act_noise_norm = wrapped_env_noise_norm.act_space.sample_uniform() pyrado.set_seed(i) act_norm_noise = wrapped_env_norm_noise.act_space.sample_uniform() # These samples must be the same since were not passed to _process_act function assert np.allclose(act_noise_norm, act_norm_noise) # Process a sampled action for i in range(3): # Sample a small random action such that the de-normalization does not map it to the act_space limits rand_act = 0.01 * env.act_space.sample_uniform() pyrado.set_seed(i) wrapped_env_noise_norm.reset() obs_noise_norm, _, _, _ = wrapped_env_noise_norm.step(rand_act) pyrado.set_seed(i) wrapped_env_norm_noise.reset() obs_norm_noise, _, _, _ = wrapped_env_norm_noise.step(rand_act) # The order of processing (first normalization or first randomization must make a difference) assert not np.allclose(obs_noise_norm, obs_norm_noise)