def eval_domain_params( pool: SamplerPool, env: SimEnv, policy: Policy, params: List[Dict], init_state: Optional[np.ndarray] = None) -> List[StepSequence]: """ Evaluate a policy on a multidimensional grid of domain parameters. :param pool: parallel sampler :param env: environment to evaluate in :param policy: policy to evaluate :param params: multidimensional grid of domain parameters :param init_state: initial state of the environment which will be fixed if not set to `None` :return: list of rollouts """ # Strip all domain randomization wrappers from the environment env = remove_all_dr_wrappers(env, verbose=True) if init_state is not None: env.init_space = SingularStateSpace(fixed_state=init_state) pool.invoke_all(_ps_init, pickle.dumps(env), pickle.dumps(policy)) # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: return pool.run_map( functools.partial(_ps_run_one_domain_param, eval=True), params, pb)
def test_setting_dp_vals(env: SimEnv): # Loop over all possible domain parameters and set them to a random value for _ in range(5): for dp_key in env.supported_domain_param: if any([ s in dp_key for s in [ "slip", "compliance", "linearvelocitydamping", "angularvelocitydamping" ] ]): # Skip the parameters that are only available in Vortex but not in Bullet assert True else: nominal_val = env.domain_param.get(dp_key) rand_val = nominal_val + nominal_val * np.random.rand() / 10 env.reset(domain_param={dp_key: rand_val}) assert env.domain_param[dp_key] == pytest.approx( rand_val, abs=5e-4) # rolling friction is imprecise
def test_parallel_sampling_deterministic_wo_min_steps( env: SimEnv, policy: Policy, min_rollouts: Optional[int], init_states: Optional[int], domain_params: Optional[List[dict]], ): env.max_steps = 20 if init_states is not None: init_states = [ env.spec.state_space.sample_uniform() for _ in range(init_states) ] nums_workers = (1, 2, 4) all_rollouts = [] for num_workers in nums_workers: # Act an exploration strategy to test if that works too (it should as the policy gets pickled and distributed # anyway). all_rollouts.append( ParallelRolloutSampler( env, NormalActNoiseExplStrat(policy, std_init=1.0), num_workers=num_workers, min_rollouts=min_rollouts, seed=0, ).sample(init_states=init_states, domain_params=domain_params)) # Test that the rollouts are actually different, i.e., that not the same seed is used for all rollouts. for ros in all_rollouts: for ro_a, ro_b in [(a, b) for a in ros for b in ros if a is not b]: # The idle policy iy deterministic and always outputs the zero action. Hence, do not check that the actions # are different when using the idle policy. if isinstance(policy, IdlePolicy): # The Quanser Ball Balancer is a deterministic environment (conditioned on the initial state). As the # idle policy is a deterministic policy, this will result in the rollouts being equivalent for each # initial state, so do not check for difference if the initial states where set. if init_states is None: assert ro_a.rewards != pytest.approx(ro_b.rewards) assert ro_a.observations != pytest.approx( ro_b.observations) else: assert ro_a.rewards != pytest.approx(ro_b.rewards) assert ro_a.observations != pytest.approx(ro_b.observations) assert ro_a.actions != pytest.approx(ro_b.actions) # Test that the rollouts for all number of workers are equal. for ros_a, ros_b in [(a, b) for a in all_rollouts for b in all_rollouts]: assert len(ros_a) == len(ros_b) for ro_a, ro_b in zip(ros_a, ros_b): assert ro_a.rewards == pytest.approx(ro_b.rewards) assert ro_a.observations == pytest.approx(ro_b.observations) assert ro_a.actions == pytest.approx(ro_b.actions)
def test_act_noise_simple(env: SimEnv): # Typical case with zero mean and non-zero std wrapped_env = GaussianActNoiseWrapper(env, noise_std=0.2 * np.ones(env.act_space.shape)) for _ in range(3): # Sample some values rand_act = env.act_space.sample_uniform() wrapped_env.reset() obs_nom, _, _, _ = env.step(rand_act) obs_wrapped, _, _, _ = wrapped_env.step(rand_act) # Different actions can not lead to the same observation assert not np.all(obs_nom == obs_wrapped) # Unusual case with non-zero mean and zero std wrapped_env = GaussianActNoiseWrapper(env, noise_mean=0.1 * np.ones(env.act_space.shape)) for _ in range(3): # Sample some values rand_act = env.act_space.sample_uniform() wrapped_env.reset() obs_nom, _, _, _ = env.step(rand_act) obs_wrapped, _, _, _ = wrapped_env.step(rand_act) # Different actions can not lead to the same observation assert not np.all(obs_nom == obs_wrapped) # General case with non-zero mean and non-zero std wrapped_env = GaussianActNoiseWrapper( env, noise_mean=0.1 * np.ones(env.act_space.shape), noise_std=0.2 * np.ones(env.act_space.shape)) for _ in range(3): # Sample some values rand_act = env.act_space.sample_uniform() wrapped_env.reset() obs_nom, _, _, _ = env.step(rand_act) obs_wrapped, _, _, _ = wrapped_env.step(rand_act) # Different actions can not lead to the same observation assert not np.all(obs_nom == obs_wrapped)
def test_parallel_sampling_deterministic_w_min_steps( env: SimEnv, policy: Policy, min_rollouts: Optional[int], min_steps: int, domain_params: Optional[List[dict]], ): env.max_steps = 20 nums_workers = (1, 2, 4) all_rollouts = [] for num_workers in nums_workers: # Act an exploration strategy to test if that works too (it should as the policy gets pickled and distributed # anyway). all_rollouts.append( ParallelRolloutSampler( env, NormalActNoiseExplStrat(policy, std_init=1.0), num_workers=num_workers, min_rollouts=min_rollouts, min_steps=min_steps * env.max_steps, seed=0, ).sample(domain_params=domain_params)) # Test that the rollouts are actually different, i.e., that not the same seed is used for all rollouts. for ros in all_rollouts: for ro_a, ro_b in [(a, b) for a in ros for b in ros if a is not b]: # The idle policy iy deterministic and always outputs the zero action. Hence, do not check that the actions # are different when using the idle policy. if not isinstance(policy, IdlePolicy): assert ro_a.rewards != pytest.approx(ro_b.rewards) assert ro_a.observations != pytest.approx(ro_b.observations) assert ro_a.actions != pytest.approx(ro_b.actions) # Test that the rollouts for all number of workers are equal. for ros_a, ros_b in [(a, b) for a in all_rollouts for b in all_rollouts]: assert sum([len(ro) for ro in ros_a]) == sum([len(ro) for ro in ros_b]) assert sum([len(ro) for ro in ros_a]) >= min_steps * env.max_steps assert sum([len(ro) for ro in ros_b]) >= min_steps * env.max_steps assert len(ros_a) == len(ros_b) if min_rollouts is not None: assert len(ros_a) >= min_rollouts assert len(ros_b) >= min_rollouts for ro_a, ro_b in zip(ros_a, ros_b): assert ro_a.rewards == pytest.approx(ro_b.rewards) assert ro_a.observations == pytest.approx(ro_b.observations) assert ro_a.actions == pytest.approx(ro_b.actions)
def test_domain_param_transforms(env: SimEnv, trafo_class: Type): pyrado.set_seed(0) # Create a mask for a random domain parameter offset = 1 idx = random.randint(0, len(env.supported_domain_param) - 1) sel_dp_change = list(env.supported_domain_param)[idx] sel_dp_fix = list( env.supported_domain_param)[(idx + offset) % len(env.supported_domain_param)] while (offset == 1 or any([ item in sel_dp_change for item in VORTEX_ONLY_DOMAIN_PARAM_LIST ]) or any([item in sel_dp_fix for item in VORTEX_ONLY_DOMAIN_PARAM_LIST])): idx = random.randint(0, len(env.supported_domain_param) - 1) sel_dp_change = list(env.supported_domain_param)[idx] sel_dp_fix = list( env.supported_domain_param)[(idx + offset) % len(env.supported_domain_param)] offset += 1 mask = (sel_dp_change, ) wenv = trafo_class(env, mask) assert isinstance(wenv, DomainParamTransform) # Check 5 random values for _ in range(5): # Change the selected domain parameter new_dp_val = random.random() * env.get_nominal_domain_param( )[sel_dp_change] new_dp_val = abs( new_dp_val) + 1e-6 # due to the domain of the new params transformed_new_dp_val = wenv.forward(new_dp_val) wenv.domain_param = { sel_dp_change: transformed_new_dp_val } # calls inverse transform if not isinstance(inner_env(wenv), SimPyEnv): wenv.reset( ) # the RcsPySim and MujocoSim classes need to be reset to apply the new domain param # Test the actual domain param and the the getters assert inner_env(wenv)._domain_param[sel_dp_change] == pytest.approx( new_dp_val, abs=1e-5) assert wenv.domain_param[sel_dp_change] == pytest.approx(new_dp_val, abs=1e-5) assert wenv.domain_param[sel_dp_fix] != pytest.approx(new_dp_val)
def test_parallel_sampling_deterministic_smoke_test_w_min_steps( tmpdir_factory, env: SimEnv, policy: Policy, algo, min_rollouts: int, min_steps: int): env.max_steps = 20 seeds = (0, 1) nums_workers = (1, 2, 4) logging_results = [] rollout_results: List[List[List[List[StepSequence]]]] = [] for seed in seeds: logging_results.append((seed, [])) rollout_results.append([]) for num_workers in nums_workers: pyrado.set_seed(seed) policy.init_param(None) ex_dir = str( tmpdir_factory.mktemp( f"seed={seed}-num_workers={num_workers}")) set_log_prefix_dir(ex_dir) vfcn = FNN(input_size=env.obs_space.flat_dim, output_size=1, hidden_sizes=[16, 16], hidden_nonlin=to.tanh) critic = GAE(vfcn, gamma=0.98, lamda=0.95, batch_size=32, lr=1e-3, standardize_adv=False) alg = algo( ex_dir, env, policy, critic, max_iter=3, min_rollouts=min_rollouts, min_steps=min_steps * env.max_steps, num_workers=num_workers, ) alg.sampler = RolloutSavingWrapper(alg.sampler) alg.train() with open(f"{ex_dir}/progress.csv") as f: logging_results[-1][1].append(str(f.read())) rollout_results[-1].append(alg.sampler.rollouts) # Test that the observations for all number of workers are equal. for rollouts in rollout_results: for ros_a, ros_b in [(a, b) for a in rollouts for b in rollouts]: assert len(ros_a) == len(ros_b) for ro_a, ro_b in zip(ros_a, ros_b): assert len(ro_a) == len(ro_b) for r_a, r_b in zip(ro_a, ro_b): assert r_a.observations == pytest.approx(r_b.observations) # Test that different seeds actually produce different results. for results_a, results_b in [(a, b) for seed_a, a in logging_results for seed_b, b in logging_results if seed_a != seed_b]: for result_a, result_b in [(a, b) for a in results_a for b in results_b if a is not b]: assert result_a != result_b # Test that same seeds produce same results. for _, results in logging_results: for result_a, result_b in [(a, b) for a in results for b in results]: assert result_a == result_b
def test_npdr_and_bayessim( ex_dir, algo_name: str, env: SimEnv, num_segments: int, len_segments: int, num_real_rollouts: int, num_sbi_rounds: int, use_rec_act: bool, ): pyrado.set_seed(0) # Create a fake ground truth target domain env_real = deepcopy(env) dp_nom = env.get_nominal_domain_param() env_real.domain_param = dict(mass_pend_pole=dp_nom["mass_pend_pole"] * 1.2, length_pend_pole=dp_nom["length_pend_pole"] * 0.8) # Reduce the number of steps to make this test run faster env.max_steps = 40 env_real.max_steps = 40 # Policy policy = QQubeSwingUpAndBalanceCtrl(env.spec) # Define a mapping: index - domain parameter dp_mapping = {1: "mass_pend_pole", 2: "length_pend_pole"} # Prior prior_hparam = dict( low=to.tensor( [dp_nom["mass_pend_pole"] * 0.5, dp_nom["length_pend_pole"] * 0.5]), high=to.tensor( [dp_nom["mass_pend_pole"] * 1.5, dp_nom["length_pend_pole"] * 1.5]), ) prior = sbiutils.BoxUniform(**prior_hparam) # Time series embedding embedding = BayesSimEmbedding( env.spec, RolloutSamplerForSBI.get_dim_data(env.spec), downsampling_factor=3, ) # Posterior (normalizing flow) posterior_hparam = dict(model="maf", embedding_net=nn.Identity(), hidden_features=20, num_transforms=3) # Policy optimization subroutine subrtn_policy_hparam = dict( max_iter=1, pop_size=2, num_init_states_per_domain=1, num_domains=2, expl_std_init=0.1, expl_factor=1.1, num_workers=1, ) subrtn_policy = HCNormal(ex_dir, env, policy, **subrtn_policy_hparam) # Algorithm algo_hparam = dict( max_iter=1, num_sim_per_round=20, num_real_rollouts=num_real_rollouts, num_sbi_rounds=num_sbi_rounds, simulation_batch_size=1, normalize_posterior=False, num_eval_samples=2, num_segments=num_segments, len_segments=len_segments, use_rec_act=use_rec_act, stop_on_done=True, subrtn_sbi_training_hparam=dict( max_num_epochs=1), # only train for 1 iteration # subrtn_sbi_sampling_hparam=dict(sample_with_mcmc=True, mcmc_parameters=dict(warmup_steps=20)), num_workers=1, ) skip = False if algo_name == NPDR.name: algo = NPDR( save_dir=ex_dir, env_sim=env, env_real=env_real, policy=policy, dp_mapping=dp_mapping, prior=prior, embedding=embedding, subrtn_sbi_class=SNPE_C, posterior_hparam=posterior_hparam, subrtn_policy=subrtn_policy, **algo_hparam, ) elif algo_name == BayesSim.name: # We are not checking multi-round SNPE-A since it has known issues if algo_hparam["num_sbi_rounds"] > 1: skip = True algo = BayesSim( save_dir=ex_dir, env_sim=env, env_real=env_real, policy=policy, dp_mapping=dp_mapping, embedding=embedding, prior=prior, subrtn_policy=subrtn_policy, **algo_hparam, ) else: raise NotImplementedError if not skip: algo.train() # Just checking the interface here assert algo.curr_iter == algo.max_iter
def test_sbi_embedding( ex_dir, env: SimEnv, embedding_name: str, num_segments: int, len_segments: int, stop_on_done: bool, state_mask_labels: Union[None, List[str]], act_mask_labels: Union[None, List[str]], ): pyrado.set_seed(0) # Reduce the number of steps to make this test run faster env.max_steps = 80 # Policy policy = QQubeSwingUpAndBalanceCtrl(env.spec) # Define a mapping: index - domain parameter dp_mapping = {1: "mass_pend_pole", 2: "length_pend_pole"} # Time series embedding if embedding_name == LastStepEmbedding.name: embedding = LastStepEmbedding( env.spec, RolloutSamplerForSBI.get_dim_data(env.spec), state_mask_labels=state_mask_labels, act_mask_labels=act_mask_labels, ) elif embedding_name == AllStepsEmbedding.name: embedding = AllStepsEmbedding( env.spec, RolloutSamplerForSBI.get_dim_data(env.spec), env.max_steps, downsampling_factor=3, state_mask_labels=state_mask_labels, act_mask_labels=act_mask_labels, ) elif embedding_name == DeltaStepsEmbedding.name: embedding = DeltaStepsEmbedding( env.spec, RolloutSamplerForSBI.get_dim_data(env.spec), env.max_steps, downsampling_factor=3, state_mask_labels=state_mask_labels, act_mask_labels=act_mask_labels, ) elif embedding_name == BayesSimEmbedding.name: embedding = BayesSimEmbedding( env.spec, RolloutSamplerForSBI.get_dim_data(env.spec), downsampling_factor=3, state_mask_labels=state_mask_labels, act_mask_labels=act_mask_labels, ) elif embedding_name == DynamicTimeWarpingEmbedding.name: embedding = DynamicTimeWarpingEmbedding( env.spec, RolloutSamplerForSBI.get_dim_data(env.spec), downsampling_factor=3, state_mask_labels=state_mask_labels, act_mask_labels=act_mask_labels, ) elif embedding_name == RNNEmbedding.name: embedding = RNNEmbedding( env.spec, RolloutSamplerForSBI.get_dim_data(env.spec), hidden_size=10, num_recurrent_layers=1, output_size=1, len_rollouts=env.max_steps, downsampling_factor=1, state_mask_labels=state_mask_labels, act_mask_labels=act_mask_labels, ) else: raise NotImplementedError sampler = SimRolloutSamplerForSBI( env, policy, dp_mapping, embedding, num_segments, len_segments, stop_on_done, rollouts_real=None, use_rec_act=False, ) # Test with 7 domain parameter sets data_sim = sampler(to.abs(to.randn(7, 2))) assert data_sim.shape == (7, embedding.dim_output)
def test_pair_plot_scatter( env: SimEnv, policy: Policy, layout: str, labels: Optional[str], legend_labels: Optional[str], axis_limits: Optional[str], use_kde: bool, use_trafo: bool, ): def _simulator(dp: to.Tensor) -> to.Tensor: """The most simple interface of a simulation to sbi, using `env` and `policy` from outer scope""" ro = rollout( env, policy, eval=True, reset_kwargs=dict(domain_param=dict(m=dp[0], k=dp[1], d=dp[2]))) observation_sim = to.from_numpy( ro.observations[-1]).to(dtype=to.float32) return to.atleast_2d(observation_sim) # Fix the init state env.init_space = SingularStateSpace(env.init_space.sample_uniform()) env_real = deepcopy(env) env_real.domain_param = {"mass": 0.8, "stiffness": 15, "d": 0.7} # Optionally transformed domain parameters for inference if use_trafo: env = LogDomainParamTransform(env, mask=["stiffness"]) # Domain parameter mapping and prior dp_mapping = {0: "mass", 1: "stiffness", 2: "d"} k_low = np.log(10) if use_trafo else 10 k_up = np.log(20) if use_trafo else 20 prior = sbiutils.BoxUniform(low=to.tensor([0.5, k_low, 0.2]), high=to.tensor([1.5, k_up, 0.8])) # Learn a likelihood from the simulator density_estimator = sbiutils.posterior_nn(model="maf", hidden_features=10, num_transforms=3) snpe = SNPE(prior, density_estimator) simulator, prior = prepare_for_sbi(_simulator, prior) domain_param, data_sim = simulate_for_sbi(simulator=simulator, proposal=prior, num_simulations=50, num_workers=1) snpe.append_simulations(domain_param, data_sim) density_estimator = snpe.train(max_num_epochs=5) posterior = snpe.build_posterior(density_estimator) # Create a fake (random) true domain parameter domain_param_gt = to.tensor([ env_real.domain_param[dp_mapping[key]] for key in sorted(dp_mapping.keys()) ]) domain_param_gt += domain_param_gt * to.randn(len(dp_mapping)) / 10 domain_param_gt = domain_param_gt.unsqueeze(0) data_real = simulator(domain_param_gt) domain_params, log_probs = SBIBase.eval_posterior( posterior, data_real, num_samples=6, normalize_posterior=False, subrtn_sbi_sampling_hparam=dict(sample_with_mcmc=False), ) dp_samples = [ domain_params.reshape(1, -1, domain_params.shape[-1]).squeeze() ] if layout == "inside": num_rows, num_cols = len(dp_mapping), len(dp_mapping) else: num_rows, num_cols = len(dp_mapping) + 1, len(dp_mapping) + 1 _, axs = plt.subplots(num_rows, num_cols, figsize=(8, 8), tight_layout=True) fig = draw_posterior_pairwise_scatter( axs=axs, dp_samples=dp_samples, dp_mapping=dp_mapping, prior=prior if axis_limits == "use_prior" else None, env_sim=env, env_real=env_real, axis_limits=axis_limits, marginal_layout=layout, labels=labels, legend_labels=legend_labels, use_kde=use_kde, ) assert fig is not None
def test_pair_plot( env: SimEnv, policy: Policy, layout: str, labels: Optional[str], prob_labels: Optional[str], use_prior: bool, use_trafo: bool, ): def _simulator(dp: to.Tensor) -> to.Tensor: """The most simple interface of a simulation to sbi, using `env` and `policy` from outer scope""" ro = rollout( env, policy, eval=True, reset_kwargs=dict(domain_param=dict(m=dp[0], k=dp[1], d=dp[2]))) observation_sim = to.from_numpy( ro.observations[-1]).to(dtype=to.float32) return to.atleast_2d(observation_sim) # Fix the init state env.init_space = SingularStateSpace(env.init_space.sample_uniform()) env_real = deepcopy(env) env_real.domain_param = {"mass": 0.8, "stiffness": 35, "d": 0.7} # Optionally transformed domain parameters for inference if use_trafo: env = SqrtDomainParamTransform(env, mask=["stiffness"]) # Domain parameter mapping and prior dp_mapping = {0: "mass", 1: "stiffness", 2: "d"} prior = sbiutils.BoxUniform(low=to.tensor([0.5, 20, 0.2]), high=to.tensor([1.5, 40, 0.8])) # Learn a likelihood from the simulator density_estimator = sbiutils.posterior_nn(model="maf", hidden_features=10, num_transforms=3) snpe = SNPE(prior, density_estimator) simulator, prior = prepare_for_sbi(_simulator, prior) domain_param, data_sim = simulate_for_sbi(simulator=simulator, proposal=prior, num_simulations=50, num_workers=1) snpe.append_simulations(domain_param, data_sim) density_estimator = snpe.train(max_num_epochs=5) posterior = snpe.build_posterior(density_estimator) # Create a fake (random) true domain parameter domain_param_gt = to.tensor( [env_real.domain_param[key] for _, key in dp_mapping.items()]) domain_param_gt += domain_param_gt * to.randn(len(dp_mapping)) / 5 domain_param_gt = domain_param_gt.unsqueeze(0) data_real = simulator(domain_param_gt) # Get a (random) condition condition = Embedding.pack(domain_param_gt.clone()) if layout == "inside": num_rows, num_cols = len(dp_mapping), len(dp_mapping) else: num_rows, num_cols = len(dp_mapping) + 1, len(dp_mapping) + 1 if use_prior: grid_bounds = None else: prior = None grid_bounds = to.cat( [to.zeros((len(dp_mapping), 1)), to.ones((len(dp_mapping), 1))], dim=1) _, axs = plt.subplots(num_rows, num_cols, figsize=(14, 14), tight_layout=True) fig = draw_posterior_pairwise_heatmap( axs, posterior, data_real, dp_mapping, condition, prior=prior, env_real=env_real, marginal_layout=layout, grid_bounds=grid_bounds, grid_res=100, normalize_posterior=False, rescale_posterior=True, labels=None if labels is None else [""] * len(dp_mapping), prob_labels=prob_labels, ) assert fig is not None
def test_combination(env: SimEnv): pyrado.set_seed(0) env.max_steps = 20 randomizer = create_default_randomizer(env) env_r = DomainRandWrapperBuffer(env, randomizer) env_r.fill_buffer(num_domains=3) dp_before = [] dp_after = [] for i in range(4): dp_before.append(env_r.domain_param) rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) dp_after.append(env_r.domain_param) assert dp_after[i] != dp_before[i] assert dp_after[0] == dp_after[3] env_rn = ActNormWrapper(env) elb = {"x_dot": -213.0, "theta_dot": -42.0} eub = {"x_dot": 213.0, "theta_dot": 42.0, "x": 0.123} env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub) alb, aub = env_rn.act_space.bounds assert all(alb == -1) assert all(aub == 1) olb, oub = env_rn.obs_space.bounds assert all(olb == -1) assert all(oub == 1) ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode()) ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations) env_rnp = ObsPartialWrapper( env_rn, idcs=[env.obs_space.labels[2], env.obs_space.labels[3]]) ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode()) env_rnpa = GaussianActNoiseWrapper( env_rnp, noise_mean=0.5 * np.ones(env_rnp.act_space.shape), noise_std=0.1 * np.ones(env_rnp.act_space.shape)) ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode()) assert not np.allclose( ro_rnp.observations, ro_rnpa.observations) # the action noise changed to rollout env_rnpd = ActDelayWrapper(env_rnp, delay=3) ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode()) assert np.allclose(ro_rnp.actions, ro_rnpd.actions) assert not np.allclose(ro_rnp.observations, ro_rnpd.observations) assert type(inner_env(env_rnpd)) == type(env) assert typed_env(env_rnpd, ObsPartialWrapper) is not None assert isinstance(env_rnpd, ActDelayWrapper) env_rnpdr = remove_env(env_rnpd, ActDelayWrapper) assert not isinstance(env_rnpdr, ActDelayWrapper)