def test_observation_wrapper_order(): # Test to make sure observation noise wrappers are applied in correct order. simple_env = make_simple_env() simple_env.reset() simple_env.observe = lambda: {"cube_pos": np.array([0.1, 0.2, 0.3])} env = RandomizeObservationWrapper( env=simple_env, levels={"cube_pos": { "uncorrelated": 0.2, "additive": 0.1 }}) env.reset() env = ObservationDelayWrapper( env, levels={ "interpolators": {}, "groups": { "vision": { "obs_names": ["cube_pos"], "mean": 1.5, "std": 0.0 }, }, }, ) with pytest.raises(AssertionError): env.step(np.zeros(env.action_space.shape))
def test_randomize_observation_wrapper(): simple_env = make_simple_env() simple_env.reset() env = RandomizeObservationWrapper( env=simple_env, levels={"cube_pos": { "uncorrelated": 0.2, "additive": 0.1 }}) with patch.object(env, "random_state") as mock_rand: # Remove randomness in the noise. mock_rand.randn.side_effect = lambda key_length: np.ones( key_length, dtype=np.float32) def mock_obs(o): simple_env.observe = lambda: o mock_obs({"cube_pos": np.array([0.1, 0.2, 0.3])}) obs = env.reset() # Make sure noise is applied on noiseless value. assert_almost_equal(obs["noisy_cube_pos"], [0.4, 0.5, 0.6]) mock_obs({ "cube_pos": np.array([0.1, 0.2, 0.3]), "noisy_cube_pos": np.array([0.2, 0.3, 0.4]), }) # Make sure noise is applied on top of noisy observation when available. obs = env.reset() assert_almost_equal(obs["noisy_cube_pos"], [0.5, 0.6, 0.7])
def test_observe(): # Test observation matches simulation state. env = make_simple_env() env.reset() simulation = env.mujoco_simulation obs = env.observe() qpos = simulation.qpos qpos[simulation.qpos_idxs["target_all_joints"]] = 0.0 qvel = simulation.qvel qvel[simulation.qvel_idxs["target_all_joints"]] = 0.0 true_obs = { "cube_pos": simulation.get_qpos("cube_position"), "cube_quat": rotation.quat_normalize(simulation.get_qpos("cube_rotation")), "hand_angle": simulation.get_qpos("hand_angle"), "fingertip_pos": simulation.shadow_hand.observe().fingertip_positions().flatten(), "qpos": qpos, "qvel": qvel, } for obs_key, true_val in true_obs.items(): assert np.allclose( obs[obs_key], true_val ), f"Value for obs {obs_key} {obs[obs_key]} doesn't match true value {true_val}."
def test_make_simple_env(): env = make_simple_env(parameters={ "simulation_params": dict(cube_appearance="vision", hide_target=True) }) env.reset() sim = env.sim # there is no wrapper. sticker_geoms = [ g for g in sim.model.geom_names if g.startswith("cube:sticker:") ] assert len(sticker_geoms) == 9 * 6
def test_action_delay_wrapper_inactive(): env = make_simple_env(starting_seed=0) env.reset() # Wrapper calls reset in its __init__ so no need to # call reset explicitly. delayed_env = ActionDelayWrapper( make_simple_env(starting_seed=0), delay=0.0, per_episode_std=0.0, per_step_std=0.0, random_state=np.random.RandomState(), ) action = env.action_space.sample() for _ in range(20): ob_env, _, _, _ = env.step(action) ob_delayed_env, _, _, _ = delayed_env.step(action) for name in ob_env: assert (np.mean(np.abs(ob_env[name] - ob_delayed_env[name])) < 1e-6), "ActionDelayWrapper should be inactive."
def test_randomized_broken_actuator_wrapper(): env = make_simple_env() env.reset() env = RandomizedBrokenActuatorWrapper(env=env, proba_broken=0.5, max_broken_actuators=4, uncorrelated=0.0) env.reset() assert len(env._broken_aids) <= 4 # The broken actuators are different after reset. orig_broken_aids = env._broken_aids.copy() env.reset() assert sorted(env._broken_aids) != sorted(orig_broken_aids) # The action is modified action = env.action(np.ones(env.action_space.shape)).copy() for i in range(env.action_space.shape[0]): if i in env._broken_aids: assert action[i] == 0.0 else: assert action[i] == 1.0
def test_wrapper_divergence(): """ This test run the same action in the vanilla dactyl_locked env and the one that is wrapped in a given wrappers. After some steps, the wrapped env should diverge from the vanilla version. """ env_kwargs = { "n_random_initial_steps": 0, } simple_env = make_simple_env(parameters=env_kwargs, starting_seed=0) dummy_env = make_simple_env( parameters=env_kwargs, starting_seed=0) # should be exact same as `simple_env` # Add you wrappers here! wrappers_to_test = [ (ActionNoiseWrapper, {}), (BacklashWrapper, {}), (FingersOccludedPhasespaceMarkers, {}), # Need 'noisy_fingertip_pos' (FingersFreezingPhasespaceMarkers, {}), # Need 'noisy_fingertip_pos' ( RandomizedBrokenActuatorWrapper, { "proba_broken": 1.0, # force one broken actuators "max_broken_actuators": 1, }, ), (RandomizedRobotFrictionWrapper, {}), (RandomizedCubeFrictionWrapper, {}), (RandomizedGravityWrapper, {}), (RandomizedJointLimitWrapper, {}), (RandomizedTendonRangeWrapper, {}), (RandomizedPhasespaceFingersWrapper, {}), (RandomizedRobotDampingWrapper, {}), (RandomizedRobotKpWrapper, {}), (RandomizedTimestepWrapper, {}), (ActionDelayWrapper, {}), # With default args, the maximum qpos difference is too small. (RandomizedActionLatency, { "max_delay": 2 }), # default 1 # (RandomizedBodyInertiaWrapper, {}), # default mass_range=[0.5, 1.5] ] wrapped_envs = [] for wrapper_class, kwargs in wrappers_to_test: env = make_simple_env(parameters=env_kwargs, starting_seed=0) if wrapper_class in ( FingersOccludedPhasespaceMarkers, FingersFreezingPhasespaceMarkers, ): env = RandomizeObservationWrapper( env=env, levels={ "fingertip_pos": { "uncorrelated": 0.002, "additive": 0.001 } }, ) env = wrapper_class(env=env, **kwargs) env.reset() wrapped_envs.append(env) for i in range(200): action = np.ones(env.action_space.shape) simple_env.step(action) dummy_env.step(action) for env in wrapped_envs: env.step(action) target_qpos_idxs = joint_qpos_ids_from_prefix( simple_env.unwrapped.sim.model, "target:") kept_indices = set(range( simple_env.unwrapped.sim.data.qpos.shape[0])) - set(target_qpos_idxs) kept_indices = sorted(kept_indices) def get_non_target_qpos(_env): return np.array(_env.unwrapped.sim.data.qpos.copy()[kept_indices]) # Make sure the base env is deterministic assert np.array_equal(get_non_target_qpos(simple_env), get_non_target_qpos(dummy_env)) for env in wrapped_envs: diffs = np.absolute( get_non_target_qpos(simple_env) - get_non_target_qpos(env)) assert np.max(diffs) > 1e-4, "failed for {}".format( env.__class__.__name__) assert np.min(diffs) > 0.0, "failed for {}".format( env.__class__.__name__)
def test_observation_delay_wrapper(): levels = { "interpolators": { "cube_quat": "QuatInterpolator", "cube_face_angle": "RadianInterpolator", }, "groups": { "vision": { "obs_names": ["cube_pos", "cube_quat"], "mean": 1.5, "std": 0.0, }, "giiker": { "obs_names": ["cube_face_angle"], "mean": 1.4, "std": 0.0 }, "phasespace": { "obs_names": ["fingertip_pos"], "mean": 1.2, "std": 0.0 }, }, } simple_env = make_simple_env() simple_env.reset() env = ObservationDelayWrapper(simple_env, levels) def mock_obs(o): simple_env.observe = lambda: o initial_obs = { "cube_pos": np.array([0.1, 0.2, 0.3]), "cube_quat": rotation.euler2quat(np.array([0.0, 0.0, 0.0])), "cube_face_angle": np.array([np.pi - 0.01, np.pi / 2 - 0.01, 0.0, 0.0, 0.0, 0.0]), "fingertip_pos": np.array([0.5, 0.6, 0.7]), } mock_obs(initial_obs) env.reset() second_obs = { "cube_pos": np.array([0.2, 0.3, 0.4]), "cube_quat": rotation.euler2quat(np.array([0.8, 0.0, 0.0])), "cube_face_angle": np.array([-np.pi + 0.01, np.pi / 2 + 0.01, 0.0, 0.0, 0.0, 0.0]), "fingertip_pos": np.array([0.5, 0.6, 0.7]), } mock_obs(second_obs) obs = env.step(np.zeros(env.action_space.shape))[0] # Should take the first observation because there are only two observations and nothing # to interpolate. for key in initial_obs: assert_almost_equal(obs[f"noisy_{key}"], initial_obs[key]) # Step env again so obs should be interpolation of initial and second obs. obs = env.step(np.zeros(env.action_space.shape))[0] assert_almost_equal(obs["noisy_cube_pos"], [0.15, 0.25, 0.35]) assert_almost_equal(rotation.quat2euler(obs["noisy_cube_quat"]), [0.4, 0.0, 0.0]) assert_almost_equal( obs["noisy_cube_face_angle"], [-np.pi + 0.002, np.pi / 2 + 0.002, 0.0, 0.0, 0.0, 0.0], ) assert_almost_equal(obs["noisy_fingertip_pos"], [0.5, 0.6, 0.7])