def test_termination_and_discount(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() zero_action = np.zeros_like(env.physics.data.ctrl) # Walker starts in upright position. # Should not trigger failure termination in the first few steps. for _ in range(5): env.step(zero_action) self.assertFalse(task.should_terminate_episode(env.physics)) np.testing.assert_array_equal(task.get_discount(env.physics), 1) # Rotate the walker upside down and run the physics until it makes contact. current_time = env.physics.data.time walker.shift_pose(env.physics, position=(0, 0, 10), quaternion=(0, 1, 0, 0)) env.physics.forward() while env.physics.data.ncon == 0: env.physics.step() env.physics.data.time = current_time # Should now trigger a failure termination. env.step(zero_action) self.assertTrue(task.should_terminate_episode(env.physics)) np.testing.assert_array_equal(task.get_discount(env.physics), 0)
def test_reward_fixed_target(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena, moving_target=False) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() target_position = task.target_position(env.physics) zero_action = np.zeros_like(env.physics.data.ctrl) for _ in range(2): timestep = env.step(zero_action) self.assertEqual(timestep.reward, 0) walker_pos = env.physics.bind(walker.root_body).xpos walker.set_pose( env.physics, position=[target_position[0], target_position[1], walker_pos[2]]) env.physics.forward() # Receive reward while the agent remains at that location. timestep = env.step(zero_action) self.assertEqual(timestep.reward, 1) # Target position should not change. np.testing.assert_array_equal(target_position, task.target_position(env.physics))
def build_env(reward_type, ghost_offset=0, clip_name='CMU_016_22', start_step=0, force_magnitude=0, disable_observables=True, termination_error_threshold=1e10): walker = cmu_humanoid.CMUHumanoidPositionControlledV2020 arena = floors.Floor() task = tracking.MultiClipMocapTracking( walker=walker, arena=arena, ref_path=cmu_mocap_data.get_path_for_cmu(version='2020'), dataset=types.ClipCollection(ids=[clip_name]), ref_steps=(1, 2, 3, 4, 5), start_step=start_step, max_steps=256, reward_type=reward_type, always_init_at_clip_start=True, termination_error_threshold=termination_error_threshold, ghost_offset=ghost_offset, force_magnitude=force_magnitude, disable_observables=disable_observables, ) env = composer.Environment( task=task, random_state=np.random.RandomState(seed=FLAGS.seed)) return env
def testActivation(self): target_radius = 0.6 prop_radius = 0.1 target_height = 1 arena = floors.Floor() target = target_sphere.TargetSphere(radius=target_radius, height_above_ground=target_height) prop = primitive.Primitive(geom_type='sphere', size=[prop_radius]) arena.attach(target) arena.add_free_entity(prop) task = composer.NullTask(arena) task.initialize_episode = ( lambda physics, random_state: prop.set_pose(physics, [0, 0, 2])) env = composer.Environment(task) env.reset() max_activated_height = target_height + target_radius + prop_radius while env.physics.bind(prop.geom).xpos[2] > max_activated_height: self.assertFalse(target.activated) self.assertEqual(env.physics.bind(target.material).rgba[-1], 1) env.step([]) while env.physics.bind(prop.geom).xpos[2] > 0.2: self.assertTrue(target.activated) self.assertEqual(env.physics.bind(target.material).rgba[-1], 0) env.step([]) # Target should be reset when the environment is reset. env.reset() self.assertFalse(target.activated) self.assertEqual(env.physics.bind(target.material).rgba[-1], 1)
def test_observables(self): walker = rodent.Rat() arena = floors.Floor( size=(10., 10.), aesthetic='outdoor_natural') task = reach.TwoTouch( walker=walker, arena=arena, target_builders=[ functools.partial(target_sphere.TargetSphereTwoTouch, radius=0.025), ], randomize_spawn_rotation=True, target_type_rewards=[25.], shuffle_target_builders=False, target_area=(1.5, 1.5), physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP, ) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) timestep = env.reset() self.assertIn('walker/joints_pos', timestep.observation)
def rodent_two_touch(random_state=None): """Requires a rodent to tap an orb, wait an interval, and tap it again.""" # Build a position-controlled rodent walker. walker = rodent.Rat( observable_options={'egocentric_camera': dict(enabled=True)}) arena = floors.Floor(size=(10., 10.), aesthetic='outdoor_natural') task = reach.TwoTouch( walker=walker, arena=arena, target_builders=[ functools.partial(target_sphere.TargetSphereTwoTouch, radius=0.025), ], randomize_spawn_rotation=True, target_type_rewards=[25.], shuffle_target_builders=False, target_area=(1.5, 1.5), physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP, ) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def test_top_camera(self): floor_width, floor_height = 12.9, 27.1 arena = floors.Floor(size=[floor_width, floor_height]) self.assertGreater(floors._TOP_CAMERA_Y_PADDING_FACTOR, 1) np.testing.assert_array_equal(arena._top_camera.zaxis, (0, 0, 1)) expected_camera_y = floor_height * floors._TOP_CAMERA_Y_PADDING_FACTOR np.testing.assert_allclose( np.tan(np.deg2rad(arena._top_camera.fovy / 2)), expected_camera_y / arena._top_camera.pos[2])
def test_top_camera(self): floor_width, floor_height = 12.9, 27.1 arena = floors.Floor(size=[floor_width, floor_height]) self.assertGreater(arena._top_camera_y_padding_factor, 1) np.testing.assert_array_equal(arena._top_camera.quat, (1, 0, 0, 0)) expected_camera_y = floor_height * arena._top_camera_y_padding_factor np.testing.assert_allclose( np.tan(np.deg2rad(arena._top_camera.fovy / 2)), expected_camera_y / arena._top_camera.pos[2])
def test_observables(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena, moving_target=False) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) timestep = env.reset() self.assertIn('walker/target', timestep.observation)
def test_target_position_randomized_on_reset(self): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena, moving_target=False) random_state = np.random.RandomState(12345) env = composer.Environment(task, random_state=random_state) env.reset() first_target_position = task.target_position(env.physics) env.reset() second_target_position = task.target_position(env.physics) self.assertFalse( np.all(first_target_position == second_target_position), 'Target positions are unexpectedly identical.')
def jumping_ball_go_to_target(random_state=None): walker = jumping_ball.JumpingBallWithHead() # Build a standard floor arena. arena = floors.Floor() # Build a task that rewards the agent for going to a target. task = go_to_target.GoToTarget(walker=walker, arena=arena, sparse_reward=False, physics_timestep=_PHYSICS_TIMESTEP, control_timestep=_CONTROL_TIMESTEP) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def cmu_humanoid_go_to_target(random_state=None): """Requires a CMU humanoid to go to a target.""" # Build a position-controlled CMU humanoid walker. walker = cmu_humanoid.CMUHumanoidPositionControlled() # Build a standard floor arena. arena = floors.Floor() # Build a task that rewards the agent for going to a target. task = go_to_target.GoToTarget(walker=walker, arena=arena, physics_timestep=0.005, control_timestep=0.03) return composer.Environment(time_limit=30, task=task, random_state=random_state, strip_singleton_obs_buffer_dim=True)
def _make(self, clip_name, reward_type, start_step, always_init_at_clip_start, ghost_offset): """ Creates the underlying MocapTracking environment. """ walker = cmu_humanoid.CMUHumanoidPositionControlledV2020 arena = floors.Floor() task = tracking.MultiClipMocapTracking( walker=walker, arena=arena, ref_path=cmu_mocap_data.get_path_for_cmu(version='2020'), dataset=types.ClipCollection(ids=[clip_name]), ref_steps=(1, 2, 3, 4, 5), start_step=start_step, max_steps=256, reward_type=reward_type, always_init_at_clip_start=always_init_at_clip_start, termination_error_threshold=0.3, ghost_offset=ghost_offset, disable_observables=False, ) env = composer.Environment(task=task, random_state=np.random.RandomState(seed=42)) return env
def __init__(self, walker, proto_modifier=None, negative_reward_on_failure_termination=True, priority_friction=False, bucket_offset=1., y_range=0.5, toss_delay=0.5, randomize_init=False, ): """Initialize ball tossing task. Args: walker: the walker to be used in this task. proto_modifier: function to modify trajectory proto. negative_reward_on_failure_termination: flag to provide negative reward as task fails. priority_friction: sets friction priority thereby making prop objects have higher friction. bucket_offset: distance in meters to push bucket (away from walker) y_range: range (uniformly sampled) of distance in meters the ball is thrown left/right of the walker. toss_delay: time in seconds to delay after catching before changing reward to encourage throwing the ball. randomize_init: flag to randomize initial pose. """ self._proto_modifier = proto_modifier self._negative_reward_on_failure_termination = ( negative_reward_on_failure_termination) self._priority_friction = priority_friction self._bucket_rewarded = False self._bucket_offset = bucket_offset self._y_range = y_range self._toss_delay = toss_delay self._randomize_init = randomize_init # load a clip to grab a ball prop and initializations loader = mocap_loader.HDF5TrajectoryLoader( mocap_data.H5_PATH, trajectories.WarehouseTrajectory) clip_number = 54 self._trajectory = loader.get_trajectory( mocap_data.IDENTIFIER_TEMPLATE.format(clip_number)) # create the floor arena self._arena = floors.Floor() self._walker = walker self._walker_geoms = tuple(self._walker.mjcf_model.find_all('geom')) self._feet_geoms = ( walker.mjcf_model.find('body', 'lfoot').find_all('geom') + walker.mjcf_model.find('body', 'rfoot').find_all('geom')) self._lhand_geoms = ( walker.mjcf_model.find('body', 'lhand').find_all('geom')) self._rhand_geoms = ( walker.mjcf_model.find('body', 'rhand').find_all('geom')) # resize the humanoid based on the motion capture data subject self._trajectory.configure_walkers([self._walker]) walker.create_root_joints(self._arena.attach(walker)) control_timestep = self._trajectory.dt self.set_timesteps(control_timestep, _PHYSICS_TIMESTEP) # build and attach the bucket to the arena self._bucket = props.Bucket(_BUCKET_SIZE) self._arena.attach(self._bucket) self._prop = self._trajectory.create_props( priority_friction=self._priority_friction)[0] self._arena.add_free_entity(self._prop) self._task_observables = collections.OrderedDict() # define feature based observations (agent may or may not use these) def ego_prop_xpos(physics): prop_xpos, _ = self._prop.get_pose(physics) walker_xpos = physics.bind(self._walker.root_body).xpos return self._walker.transform_vec_to_egocentric_frame( physics, prop_xpos - walker_xpos) self._task_observables['prop_{}/xpos'.format(0)] = ( observable.Generic(ego_prop_xpos)) def prop_zaxis(physics): prop_xmat = physics.bind( mjcf.get_attachment_frame(self._prop.mjcf_model)).xmat return prop_xmat[[2, 5, 8]] self._task_observables['prop_{}/zaxis'.format(0)] = ( observable.Generic(prop_zaxis)) def ego_bucket_xpos(physics): bucket_xpos, _ = self._bucket.get_pose(physics) walker_xpos = physics.bind(self._walker.root_body).xpos return self._walker.transform_vec_to_egocentric_frame( physics, bucket_xpos - walker_xpos) self._task_observables['bucket_{}/xpos'.format(0)] = ( observable.Generic(ego_bucket_xpos)) for obs in (self._walker.observables.proprioception + self._walker.observables.kinematic_sensors + self._walker.observables.dynamic_sensors + list(self._task_observables.values())): obs.enabled = True
def test_can_compile_mjcf(self): arena = floors.Floor() mjcf.Physics.from_mjcf_model(arena.mjcf_model)
def test_size(self): floor_size = (12.9, 27.1) arena = floors.Floor(size=floor_size) self.assertEqual(tuple(arena.ground_geoms[0].size[:2]), floor_size)
def _build_task(**task_kwargs): walker = cmu_humanoid.CMUHumanoid() arena = floors.Floor() task = go_to_target.GoToTarget(walker=walker, arena=arena, **task_kwargs) return task