def rodent_run_gaps(random_state=None): """Requires a rodent to run down a corridor with gaps.""" # Build a position-controlled rodent walker. walker = rodent.Rat( observable_options={'egocentric_camera': dict(enabled=True)}) # Build a corridor-shaped arena with gaps, where the sizes of the gaps and # platforms are uniformly randomized. arena = corr_arenas.GapsCorridor(platform_length=distributions.Uniform( .4, .8), gap_length=distributions.Uniform(.05, .2), corridor_width=2, corridor_length=40, aesthetic='outdoor_natural') # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(5, 0, 0), walker_spawn_rotation=0, target_velocity=1.0, contact_termination=False, terminate_at_height=-0.3, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def test_termination_and_discount(self): walker = cmu_humanoid.CMUHumanoid() arena = corridor_arenas.EmptyCorridor() task = corridor_tasks.RunThroughCorridor(walker, arena) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() zero_action = np.zeros_like(env.physics.data.ctrl) # Walker starts in upright position. # Should not trigger failure termination in the first few steps. for _ in range(5): env.step(zero_action) self.assertFalse(task.should_terminate_episode(env.physics)) self.assertEqual(task.get_discount(env.physics), 1) # Rotate the walker upside down and run the physics until it makes contact. current_time = env.physics.data.time walker.shift_pose(env.physics, position=(0, 0, 10), quaternion=(0, 1, 0, 0)) env.physics.forward() while env.physics.data.ncon == 0: env.physics.step() env.physics.data.time = current_time # Should now trigger a failure termination. env.step(zero_action) self.assertTrue(task.should_terminate_episode(env.physics)) self.assertEqual(task.get_discount(env.physics), 0)
def test_walker_is_correctly_reinitialized( self, position_offset, rotate_180_degrees, use_variations): walker_spawn_position = position_offset if not rotate_180_degrees: walker_spawn_rotation = None else: walker_spawn_rotation = np.pi if use_variations: walker_spawn_position = deterministic.Constant(position_offset) walker_spawn_rotation = deterministic.Constant(walker_spawn_rotation) walker = cmu_humanoid.CMUHumanoid() arena = corridor_arenas.EmptyCorridor() task = corridor_tasks.RunThroughCorridor( walker=walker, arena=arena, walker_spawn_position=walker_spawn_position, walker_spawn_rotation=walker_spawn_rotation) # Randomize the initial pose and joint positions in order to check that they # are set correctly by `initialize_episode`. random_state = np.random.RandomState(12345) task.initialize_episode_mjcf(random_state) physics = mjcf.Physics.from_mjcf_model(task.root_entity.mjcf_model) walker_joints = walker.mjcf_model.find_all('joint') physics.bind(walker_joints).qpos = random_state.uniform( size=len(walker_joints)) walker.set_pose(physics, position=random_state.uniform(size=3), quaternion=rotations.UniformQuaternion()(random_state)) task.initialize_episode(physics, random_state) physics.forward() with self.subTest('Correct joint positions'): walker_qpos = physics.bind(walker_joints).qpos if walker.upright_pose.qpos is not None: np.testing.assert_array_equal(walker_qpos, walker.upright_pose.qpos) else: walker_qpos0 = physics.bind(walker_joints).qpos0 np.testing.assert_array_equal(walker_qpos, walker_qpos0) walker_xpos, walker_xquat = walker.get_pose(physics) with self.subTest('Correct position'): expected_xpos = walker.upright_pose.xpos + np.array(position_offset) np.testing.assert_array_equal(walker_xpos, expected_xpos) with self.subTest('Correct orientation'): upright_xquat = walker.upright_pose.xquat.copy() upright_xquat /= np.linalg.norm(walker.upright_pose.xquat) if rotate_180_degrees: expected_xquat = (-upright_xquat[3], -upright_xquat[2], upright_xquat[1], upright_xquat[0]) else: expected_xquat = upright_xquat np.testing.assert_allclose(walker_xquat, expected_xquat)
def cmu_humanoid_run_gaps(random_state=None): """Requires a CMU humanoid to run down a corridor with gaps.""" # Build a position-controlled CMU humanoid walker. walker = cmu_humanoid.CMUHumanoidPositionControlled( observable_options={'egocentric_camera': dict(enabled=True)}) # Build a corridor-shaped arena with gaps, where the sizes of the gaps and # platforms are uniformly randomized. arena = corr_arenas.GapsCorridor( platform_length=distributions.Uniform(.3, 2.5), gap_length=distributions.Uniform(.5, 1.25), corridor_width=10, corridor_length=100) # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(0.5, 0, 0), target_velocity=3.0, physics_timestep=0.005, control_timestep=0.03) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def walker_run_gaps(random_state=None): walker = planar_walker.PlanarWalker() # Build a corridor-shaped arena with gaps, where the sizes of the gaps and # platforms are uniformly randomized. arena = corr_arenas.GapsCorridor( platform_length=distributions.Uniform(1.25, 2.5), # (0.3, 2.5) gap_length=distributions.Uniform(0.3, 0.7), # (0.5, 1.25) corridor_width=10, corridor_length=250) # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(1.0, 0, 0), stand_height=1.2, contact_termination=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) # (Chongyi Zheng): redefine reward function task.get_reward = _walker_get_reward.__get__(task, task.get_reward) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def cmu_humanoid_run_walls(random_state=None): """Requires a CMU humanoid to run down a corridor obstructed by walls.""" # Build a position-controlled CMU humanoid walker. walker = cmu_humanoid.CMUHumanoidPositionControlled( observable_options={'egocentric_camera': dict(enabled=True)}) # Build a corridor-shaped arena that is obstructed by walls. arena = corr_arenas.WallsCorridor(wall_gap=4., wall_width=distributions.Uniform(1, 7), wall_height=3.0, corridor_width=10, corridor_length=100, include_initial_padding=False) # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(0.5, 0, 0), target_velocity=3.0, physics_timestep=0.005, control_timestep=0.03) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def ant_run_walls(): walker = ant.Ant() arena = corr_arenas.WallsCorridor(wall_gap=4., wall_width=distributions.Uniform(1, 7), wall_height=3.0, corridor_width=10, corridor_length=250, include_initial_padding=False) task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(0.5, 0, 0), walker_spawn_rotation=0, stand_height=0.2, contact_termination=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) # (Chongyi Zheng): redefine reward function # task.get_reward = _ant_get_reward.__get__(task, task.get_reward) return composer.Environment(time_limit=30, task=task, strip_singleton_obs_buffer_dim=True)
def jumping_ball_run_gaps(random_state=None): walker = jumping_ball.JumpingBallWithHead() # Build a corridor-shaped arena with gaps, where the sizes of the gaps and # platforms are uniformly randomized. arena = corr_arenas.GapsCorridor( platform_length=distributions.Uniform(1.0, 2.5), # (0.3, 2.5) gap_length=distributions.Uniform(0.3, 0.7), # (0.5, 1.25) corridor_width=10, corridor_length=250) # Build a task that rewards the agent for running down the corridor at a # specific velocity. task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(1.0, 0, 0), target_velocity=3.0, contact_termination=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def ant_run(random_state=None): walker = ant.Ant() arena = corr_arenas.EmptyCorridor() task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(5, 0, 0), walker_spawn_rotation=0, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def _get_jumping_ball_corridor_physics(): walker = jumping_ball.JumpingBallWithHead() arena = corr_arenas.EmptyCorridor() task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(5, 0, 0), walker_spawn_rotation=0, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) env = composer.Environment(time_limit=30, task=task, strip_singleton_obs_buffer_dim=True) return walker, env
def jumping_ball_run_long(): walker = jumping_ball.JumpingBallWithHead() arena = corr_arenas.EmptyCorridor(corridor_length=250, visible_side_planes=False) task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(1, 0, 0), walker_spawn_rotation=0, contact_termination=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, strip_singleton_obs_buffer_dim=True)
def walker_run_long(): walker = planar_walker.PlanarWalker() arena = corr_arenas.EmptyCorridor(corridor_length=250, visible_side_planes=False) task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(1, 0, 0), walker_spawn_rotation=0, stand_height=1.2, contact_termination=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) # (Chongyi Zheng): redefine reward function task.get_reward = _walker_get_reward.__get__(task, task.get_reward) return composer.Environment(time_limit=30, task=task, strip_singleton_obs_buffer_dim=True)
def jumping_ball_run_walls(): walker = jumping_ball.JumpingBallWithHead() arena = corr_arenas.WallsCorridor(wall_gap=4., wall_width=distributions.Uniform(1, 7), wall_height=3.0, corridor_width=10, corridor_length=250, include_initial_padding=False) task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(0.5, 0, 0), walker_spawn_rotation=0, contact_termination=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, strip_singleton_obs_buffer_dim=True)
def ant_run_walls(random_state=None): walker = ant.Ant() arena = corr_arenas.WallsCorridor(wall_gap=4., wall_width=distributions.Uniform(1, 7), wall_height=3.0, corridor_width=10, corridor_length=100, include_initial_padding=False) task = corr_tasks.RunThroughCorridor(walker=walker, arena=arena, walker_spawn_position=(0.5, 0, 0), walker_spawn_rotation=0, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)