def _run_icp_rotation_goal_test(env, z_rots, matches, init_rot=np.zeros(3)): init_quat = rotation.euler2quat(init_rot) for z_rot, match in zip(z_rots, matches): def mock_randomize_goal_rot(*args, **kwargs): z_quat = rotation.quat_from_angle_and_axis(z_rot, np.array([0.0, 0.0, 1])) target_quat = rotation.quat_mul(z_quat, init_quat) env.mujoco_simulation.set_object_quat(np.array([init_quat])) env.mujoco_simulation.set_target_quat(np.array([target_quat])) env.mujoco_simulation.forward() with patch("robogym.envs.rearrange.goals.object_state.ObjectStateGoal." "_randomize_goal_orientation") as mock_obj: mock_obj.side_effect = mock_randomize_goal_rot safe_reset_env(env) # set the object position same as the target position. sim = env.unwrapped.mujoco_simulation sim.set_object_pos(sim.get_target_pos()) sim.forward() goal_dist = env.unwrapped.goal_generation.goal_distance( env.unwrapped._goal, env.unwrapped.goal_generation.current_state()) rot_dist = goal_dist["obj_rot"] if match: assert np.all(rot_dist < 0.2) else: assert np.all(rot_dist > 0.2)
def test_relative_distance(): pos = np.array([ [1.35620895, 0.5341387, 0.48623528], [1.37134474, 0.53952575, 0.526087], [1.3713598, 0.53945007, 0.5056565], ]) goal_state = { "obj_pos": pos, "obj_rot": np.zeros_like(pos), } current_pos = pos.copy() rnd_pos = np.array([np.random.random(), np.random.random(), 0]) current_pos += rnd_pos current_state = { "obj_pos": current_pos, "obj_rot": np.zeros_like(current_pos), } env = make_holdout_env() safe_reset_env(env) goal = HoldoutObjectStateGoal(env.unwrapped.mujoco_simulation) dist = goal.goal_distance(goal_state, current_state) assert np.allclose(dist["rel_dist"], np.zeros(3))
def test_goals_differ_from_obs(vision): make_env_args = { "constants": { "vision": vision, "goal_args": { "randomize_goal_rot": True } }, "parameters": { "simulation_params": { "num_objects": 3, "goal_rot_weight": 1.0, "goal_distance_ratio": 1.0, } }, } for make_env in [ make_blocks_env, make_train_env, make_ycb_env, make_composer_env ]: env = make_env(**make_env_args) obs = safe_reset_env(env) assert not np.allclose(obs["obj_pos"], obs["goal_obj_pos"]) assert not np.allclose(obs["obj_rot"], obs["goal_obj_rot"]) obs, _, _, _ = env.step(env.action_space.sample()) assert not np.allclose(obs["obj_pos"], obs["goal_obj_pos"]) assert not np.allclose(obs["obj_rot"], obs["goal_obj_rot"]) obs = safe_reset_env(env, only_reset_goal=True) assert not np.allclose(obs["obj_pos"], obs["goal_obj_pos"]) assert not np.allclose(obs["obj_rot"], obs["goal_obj_rot"])
def test_composer_env(): for num_max_geoms in [1, 3, 5, 8]: parameters = { "simulation_params": { "num_max_geoms": num_max_geoms, "num_objects": 3 } } env = make_composer_env(parameters=parameters) for _ in range(10): safe_reset_env(env) env.step(env.action_space.sample())
def test_relative_pos_for_duplicated_objects(): current_goal_state = { "obj_rot": np.array([ [0, 0, np.pi / 2], [np.pi / 2, 0, 0], [0, np.pi / 2, 0], [np.pi / 2, 0, 0], [0, np.pi / 2, 0], [np.pi / 2, 0, 0], ]), "obj_pos": np.array([[2, 2, 2], [3, 3, 4], [0, 1, 1], [1, 2, 3], [1, 1, 1], [5, 5, 6]]), } target_goal_state = { "obj_rot": np.array([ [0, np.pi / 2, 0], [0, 0, np.pi / 2], [np.pi / 2, 0, 0], [0, np.pi / 2, 0], [np.pi / 2, 0, 0], [np.pi / 2, 0, 0], ]), "obj_pos": np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3], [1, 1, 1], [1, 2, 3], [6, 5, 6]]), } env = make_composer_env( parameters={ "simulation_params": { "num_objects": 6, "max_num_objects": 6, "num_max_geoms": 1, } }) safe_reset_env(env) goal_generator = ObjectStateGoal(env.unwrapped.mujoco_simulation) relative_goal = goal_generator.relative_goal(target_goal_state, current_goal_state) assert np.allclose( relative_goal["obj_pos"], np.array([[0, 0, 0], [0, 0, -1], [1, 0, 0], [0, 0, 0], [0, 0, 0], [1, 0, 0]]), ) assert np.allclose(relative_goal["obj_rot"], np.zeros((6, 3)))
def test_randomize_goal_rotation(randomize_goal_rot): make_env_args = { "starting_seed": 0, "constants": { "goal_args": { "randomize_goal_rot": randomize_goal_rot } }, "parameters": { "n_random_initial_steps": 0, "simulation_params": { "num_objects": 1 }, }, } for make_env in [make_blocks_env, make_train_env]: env = make_env(**make_env_args) obs = safe_reset_env(env) if randomize_goal_rot: assert not np.allclose( obs["obj_rot"], obs["goal_obj_rot"], atol=1e-3) else: # If we do not randomize goal rotation, it is same as object rotation by default. assert np.allclose(obs["obj_rot"], obs["goal_obj_rot"], atol=1e-3) assert np.allclose( obs["rel_goal_obj_rot"], np.zeros(obs["rel_goal_obj_rot"].shape), atol=1e-3, )
def test_randomize_goal_orientation(rot_type): random_state = np.random.RandomState(seed=0) env = make_blocks_env( parameters={ "simulation_params": { "num_objects": 3, "max_num_objects": 8 } }, constants={ "goal_args": { "randomize_goal_rot": True, "rot_randomize_type": rot_type, "stabilize_goal": False, # stabilize_goal should be False to test 'full' rotation } }, ) safe_reset_env(env) goal_gen = env.goal_generation quats = [] for _ in range(100): goal_gen._randomize_goal_orientation(random_state) quats.append(goal_gen.mujoco_simulation.get_target_quat(pad=False)) quats = np.concatenate(quats, axis=0) # [num randomization, 4] # there should be some variance in the randomized results assert quats.std() > 0.0 if rot_type == "z_axis": # ensure objects are rotated along z axis only for i in range(quats.shape[0]): assert rotation.rot_z_aligned(quats[i], 0.02, include_flip=False) elif rot_type == "block": # ensure at least one face of the block is facing on top for i in range(quats.shape[0]): assert rotation.rot_xyz_aligned(quats[i], 0.02) elif rot_type == "full": # ensure that at least one randomize object has weird pose that any of the face does not # face the top direction rot_xyz_aligned = [] for i in range(quats.shape[0]): rot_xyz_aligned.append(rotation.rot_xyz_aligned(quats[i], 0.02)) assert all(rot_xyz_aligned) is False
def _run_rotation_test(make_env, parameters, constants, angles_to_dists, atol=0.005): env = make_env(parameters=parameters, constants=constants) env.unwrapped.goal_generation = FixedRotationGoal( env.mujoco_simulation, args=constants["goal_args"]) for angle, dist in angles_to_dists.items(): # Attempt 10 times, since sometimes the object is placed so that the gripper bumps # into it. for attempt in range(10): env.unwrapped.goal_generation.fixed_z = angle safe_reset_env(env) info = env.goal_info()[-1] assert "goal_dist" in info if np.allclose(info["goal_dist"]["obj_rot"], dist, atol=atol): break assert_allclose(info["goal_dist"]["obj_rot"], dist, atol=atol)
def test_randomize_initial_robot_position(control_mode, tcp_solver_mode): parameters = dict( n_random_initial_steps=10, robot_control_params=dict( control_mode=control_mode, tcp_solver_mode=tcp_solver_mode, max_position_change=RobotControlParameters. default_max_pos_change_for_solver( control_mode=control_mode, tcp_solver_mode=tcp_solver_mode, ), ), ) for env in _list_rearrange_envs(parameters=parameters, starting_seed=1): obs1 = safe_reset_env(env) obs2 = safe_reset_env(env) # robot TCP pos is randomized, gripper not in motion assert not np.allclose(obs2["gripper_pos"], obs1["gripper_pos"]) assert np.allclose(obs2["gripper_velp"], 0.0, atol=3e-3)
def test_stablize_goal_objects(): random_state = np.random.RandomState(seed=0) env = make_composer_env(parameters={ "simulation_params": { "num_objects": 3, "max_num_objects": 8 } }).unwrapped safe_reset_env(env) goal_gen = env.goal_generation # randomly sample goals goal_gen._randomize_goal_orientation(random_state) goal_positions, _ = goal_gen._sample_next_goal_positions(random_state) # intentionally put the target up in the air goal_positions[:, -1] += 0.1 goal_gen.mujoco_simulation.set_target_pos(goal_positions) goal_gen.mujoco_simulation.forward() old_obj_pos = goal_gen.mujoco_simulation.get_object_pos() old_obj_rot = goal_gen.mujoco_simulation.get_object_rot() old_target_pos = goal_gen.mujoco_simulation.get_target_pos() old_target_rot = goal_gen.mujoco_simulation.get_target_rot() goal_gen._stablize_goal_objects() new_obj_pos = goal_gen.mujoco_simulation.get_object_pos() new_obj_rot = goal_gen.mujoco_simulation.get_object_rot() assert np.allclose(new_obj_pos, old_obj_pos) assert np.allclose(new_obj_rot, old_obj_rot) new_target_pos = goal_gen.mujoco_simulation.get_target_pos() new_target_rot = goal_gen.mujoco_simulation.get_target_rot() assert all(old_target_pos[:3, -1] > new_target_pos[:3, -1]) assert not np.allclose(old_target_rot, new_target_rot)
def test_vision_observations(): for env in _list_rearrange_envs(constants={"vision": True}): env.seed(123) obs1 = safe_reset_env(env) assert "vision_obs" in obs1 assert "vision_goal" in obs1 for _ in range(10): obs2, _, _, info = env.step(np.ones_like( env.action_space.sample())) assert "vision_obs" in obs2 assert "vision_goal" in obs2 assert "successes_so_far" in info assert not np.allclose(obs1["vision_obs"], obs2["vision_obs"]) if info["successes_so_far"] == 0: # Goal should not have changed. assert np.allclose(obs1["vision_goal"], obs2["vision_goal"]) # check the goal is up-to-date in simulator. old_goal = obs1["goal_obj_pos"].copy() old_sim_goal = _read_goal_state_from_sim(env.unwrapped.sim) assert np.array_equal(old_goal, old_sim_goal) else: # Unlikely but might happen: We achieved the goal on accident and we might # thus have a new goal. old_goal = obs2["goal_obj_pos"].copy() assert not np.allclose(obs1["vision_goal"], obs2["vision_goal"]) # Reset the goal and ensure that the goal vision observation has indeed changed. env.reset_goal() obs3, _, _, _ = env.step(np.zeros_like(env.action_space.sample())) assert "vision_obs" in obs3 assert "vision_goal" in obs3 assert is_fixed_goal_env(env) or not np.allclose( obs2["vision_goal"], obs3["vision_goal"]) # check the new goal should take effect in simulator. new_goal = obs3["goal_obj_pos"].copy() new_sim_goal = _read_goal_state_from_sim(env.unwrapped.sim) assert is_fixed_goal_env(env) or not np.array_equal(new_goal, old_goal) assert np.array_equal(new_goal, new_sim_goal)
def test_observations_change_over_time(n_resets=3, n_trials=3): static_keys = frozenset(( "obj_pos", "obj_rel_pos", "obj_vel_pos", "obj_rot", "obj_vel_rot", "obj_bbox_size", "goal_obj_pos", "goal_obj_rot", "qpos_goal", "is_goal_achieved", "gripper_controls", "gripper_qpos", "gripper_vel", "rel_goal_obj_pos", "rel_goal_obj_rot", "obj_gripper_contact", "obj_colors", "safety_stop", )) for env in _list_rearrange_envs(): for reset_i in range(n_resets): obs = safe_reset_env(env) for i in range(n_trials): last_obs = obs # Apply 10 random steps. for _ in range(10): obs, _, _, _ = env.step(env.action_space.sample()) for key in obs: if key not in static_keys: assert not np.allclose(obs[key], last_obs[key]), ( f"Observations unchanged between steps for {key} at reset {reset_i} " f"and trial {i}: {obs[key]}")
def test_observation_space(control_mode, action_dim, gripper_dim, robot_dim, robot_joint_pos): def _get_expected_obs_space_shape(num_objects): return { "goal_obj_pos": ( num_objects, 3, ), "gripper_pos": (3, ), "gripper_qpos": (gripper_dim, ), "gripper_vel": (gripper_dim, ), "gripper_velp": (3, ), "obj_pos": ( num_objects, 3, ), "obj_rel_pos": ( num_objects, 3, ), "obj_rot": ( num_objects, 3, ), "obj_vel_pos": ( num_objects, 3, ), "obj_vel_rot": ( num_objects, 3, ), "qpos": (robot_dim + 7 * num_objects, ), "qpos_goal": (robot_dim + 7 * num_objects, ), "robot_joint_pos": (robot_joint_pos, ), "safety_stop": (1, ), "tcp_force": (3, ), "tcp_torque": (3, ), } env_args = dict(parameters=dict(robot_control_params=dict( control_mode=control_mode)), ) for env in _list_rearrange_envs(**env_args): num_objects = env.unwrapped.mujoco_simulation.num_objects ob_shapes = _get_expected_obs_space_shape(num_objects) for key, ref_shape in ob_shapes.items(): assert (key in env.observation_space.spaces ), f"{key} not in observation_space" curr_shape = env.observation_space.spaces[key].shape assert ( curr_shape == ref_shape ), f"{key} has wrong shape: is {curr_shape}, expected {ref_shape}" obs = safe_reset_env(env) for key, ref_shape in ob_shapes.items(): assert key in obs, f"{key} not in observation_space" curr_shape = obs[key].shape assert ( curr_shape == ref_shape ), f"{key} has wrong shape: is {curr_shape}, expected {ref_shape}"
def test_observations_change_between_resets(n_random_initial_steps, randomize_goal_rot): # Ignore keys which are mostly static, and may or may not differ between resets. skip_keys = { "is_goal_achieved", "rel_goal_obj_rot", "obj_bbox_size", # gripper initial control (should be zero) "gripper_controls", # robogym.envs.rearrange.common.utils.stabilize_objects is not perfect to ensure zero # initial velocity of every object "obj_vel_pos", "obj_vel_rot", # gripper data "gripper_qpos", "gripper_vel", "gripper_pos", "gripper_velp", "obj_gripper_contact", # robot_joint_pos has small perturbation even when n_random_initial_steps=0 "robot_joint_pos", # safety stop obs will be False most of the time, and tcp_force/torque may not change # when the robot is not interacting with objects "safety_stop", "tcp_force", "tcp_torque", } static_keys = {"action_ema"} static_keys = frozenset(static_keys) skip_keys = frozenset(skip_keys) make_env_args = dict( constants={"goal_args": { "randomize_goal_rot": randomize_goal_rot }}, parameters={"n_random_initial_steps": n_random_initial_steps}, starting_seed=1, # seed 0 failed for ReachEnv ) for env in _list_rearrange_envs(**make_env_args): # Fixed goal envs are taken care of in the test below. if is_fixed_goal_env(env): continue extra_skip_keys = set() if is_env_without_goal_rot_randomize(env): extra_skip_keys.add("goal_obj_rot") extra_static_keys = set() if is_fixed_initial_state_env(env): # Unreliable gripper make obj_rel_pos also unreliable. extra_skip_keys.add("obj_rel_pos") extra_static_keys.update(["obj_pos", "obj_rot"]) if not randomize_goal_rot: extra_static_keys.add("goal_obj_rot") if n_random_initial_steps == 0: extra_static_keys.add("qpos") if "block" in env.unwrapped.__class__.__name__.lower(): extra_skip_keys.add("obj_colors") obs1 = safe_reset_env(env) obs2 = safe_reset_env(env) assert set(obs1.keys()) == set(obs2.keys()) for key in obs1: if key in skip_keys or key in extra_skip_keys: continue elif key in static_keys or key in extra_static_keys: assert np.allclose(obs1[key], obs2[key], atol=5e-3), ( f"Observations in {env.unwrapped.__class__.__name__} changed " f"between resets for {key}: {obs1[key]}") else: assert not np.allclose(obs1[key], obs2[key], atol=1e-3), ( f"Observations in {env.unwrapped.__class__.__name__} unchanged " f"between resets for {key}: {obs1[key]}")
def test_env_basic_action(): for env in _list_rearrange_envs(): safe_reset_env(env) for _ in range(10): env.step(env.action_space.sample())