Exemplo n.º 1
0
    def test_termination_and_discount(self):
        walker = cmu_humanoid.CMUHumanoid()
        arena = floors.Floor()
        task = go_to_target.GoToTarget(walker=walker, arena=arena)

        random_state = np.random.RandomState(12345)
        env = composer.Environment(task, random_state=random_state)
        env.reset()

        zero_action = np.zeros_like(env.physics.data.ctrl)

        # Walker starts in upright position.
        # Should not trigger failure termination in the first few steps.
        for _ in range(5):
            env.step(zero_action)
            self.assertFalse(task.should_terminate_episode(env.physics))
            np.testing.assert_array_equal(task.get_discount(env.physics), 1)

        # Rotate the walker upside down and run the physics until it makes contact.
        current_time = env.physics.data.time
        walker.shift_pose(env.physics,
                          position=(0, 0, 10),
                          quaternion=(0, 1, 0, 0))
        env.physics.forward()
        while env.physics.data.ncon == 0:
            env.physics.step()
        env.physics.data.time = current_time

        # Should now trigger a failure termination.
        env.step(zero_action)
        self.assertTrue(task.should_terminate_episode(env.physics))
        np.testing.assert_array_equal(task.get_discount(env.physics), 0)
Exemplo n.º 2
0
    def test_reward_fixed_target(self):
        walker = cmu_humanoid.CMUHumanoid()
        arena = floors.Floor()
        task = go_to_target.GoToTarget(walker=walker,
                                       arena=arena,
                                       moving_target=False)

        random_state = np.random.RandomState(12345)
        env = composer.Environment(task, random_state=random_state)
        env.reset()

        target_position = task.target_position(env.physics)
        zero_action = np.zeros_like(env.physics.data.ctrl)
        for _ in range(2):
            timestep = env.step(zero_action)
            self.assertEqual(timestep.reward, 0)
        walker_pos = env.physics.bind(walker.root_body).xpos
        walker.set_pose(
            env.physics,
            position=[target_position[0], target_position[1], walker_pos[2]])
        env.physics.forward()

        # Receive reward while the agent remains at that location.
        timestep = env.step(zero_action)
        self.assertEqual(timestep.reward, 1)

        # Target position should not change.
        np.testing.assert_array_equal(target_position,
                                      task.target_position(env.physics))
Exemplo n.º 3
0
def build_env(reward_type,
              ghost_offset=0,
              clip_name='CMU_016_22',
              start_step=0,
              force_magnitude=0,
              disable_observables=True,
              termination_error_threshold=1e10):
    walker = cmu_humanoid.CMUHumanoidPositionControlledV2020
    arena = floors.Floor()
    task = tracking.MultiClipMocapTracking(
        walker=walker,
        arena=arena,
        ref_path=cmu_mocap_data.get_path_for_cmu(version='2020'),
        dataset=types.ClipCollection(ids=[clip_name]),
        ref_steps=(1, 2, 3, 4, 5),
        start_step=start_step,
        max_steps=256,
        reward_type=reward_type,
        always_init_at_clip_start=True,
        termination_error_threshold=termination_error_threshold,
        ghost_offset=ghost_offset,
        force_magnitude=force_magnitude,
        disable_observables=disable_observables,
    )
    env = composer.Environment(
        task=task, random_state=np.random.RandomState(seed=FLAGS.seed))
    return env
Exemplo n.º 4
0
  def testActivation(self):
    target_radius = 0.6
    prop_radius = 0.1
    target_height = 1

    arena = floors.Floor()
    target = target_sphere.TargetSphere(radius=target_radius,
                                        height_above_ground=target_height)
    prop = primitive.Primitive(geom_type='sphere', size=[prop_radius])
    arena.attach(target)
    arena.add_free_entity(prop)

    task = composer.NullTask(arena)
    task.initialize_episode = (
        lambda physics, random_state: prop.set_pose(physics, [0, 0, 2]))

    env = composer.Environment(task)
    env.reset()

    max_activated_height = target_height + target_radius + prop_radius

    while env.physics.bind(prop.geom).xpos[2] > max_activated_height:
      self.assertFalse(target.activated)
      self.assertEqual(env.physics.bind(target.material).rgba[-1], 1)
      env.step([])

    while env.physics.bind(prop.geom).xpos[2] > 0.2:
      self.assertTrue(target.activated)
      self.assertEqual(env.physics.bind(target.material).rgba[-1], 0)
      env.step([])

    # Target should be reset when the environment is reset.
    env.reset()
    self.assertFalse(target.activated)
    self.assertEqual(env.physics.bind(target.material).rgba[-1], 1)
Exemplo n.º 5
0
  def test_observables(self):
    walker = rodent.Rat()

    arena = floors.Floor(
        size=(10., 10.),
        aesthetic='outdoor_natural')

    task = reach.TwoTouch(
        walker=walker,
        arena=arena,
        target_builders=[
            functools.partial(target_sphere.TargetSphereTwoTouch, radius=0.025),
        ],
        randomize_spawn_rotation=True,
        target_type_rewards=[25.],
        shuffle_target_builders=False,
        target_area=(1.5, 1.5),
        physics_timestep=_PHYSICS_TIMESTEP,
        control_timestep=_CONTROL_TIMESTEP,
    )
    random_state = np.random.RandomState(12345)
    env = composer.Environment(task, random_state=random_state)
    timestep = env.reset()

    self.assertIn('walker/joints_pos', timestep.observation)
Exemplo n.º 6
0
def rodent_two_touch(random_state=None):
    """Requires a rodent to tap an orb, wait an interval, and tap it again."""

    # Build a position-controlled rodent walker.
    walker = rodent.Rat(
        observable_options={'egocentric_camera': dict(enabled=True)})

    arena = floors.Floor(size=(10., 10.), aesthetic='outdoor_natural')

    task = reach.TwoTouch(
        walker=walker,
        arena=arena,
        target_builders=[
            functools.partial(target_sphere.TargetSphereTwoTouch,
                              radius=0.025),
        ],
        randomize_spawn_rotation=True,
        target_type_rewards=[25.],
        shuffle_target_builders=False,
        target_area=(1.5, 1.5),
        physics_timestep=_PHYSICS_TIMESTEP,
        control_timestep=_CONTROL_TIMESTEP,
    )

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Exemplo n.º 7
0
    def test_top_camera(self):
        floor_width, floor_height = 12.9, 27.1
        arena = floors.Floor(size=[floor_width, floor_height])

        self.assertGreater(floors._TOP_CAMERA_Y_PADDING_FACTOR, 1)
        np.testing.assert_array_equal(arena._top_camera.zaxis, (0, 0, 1))

        expected_camera_y = floor_height * floors._TOP_CAMERA_Y_PADDING_FACTOR
        np.testing.assert_allclose(
            np.tan(np.deg2rad(arena._top_camera.fovy / 2)),
            expected_camera_y / arena._top_camera.pos[2])
Exemplo n.º 8
0
    def test_top_camera(self):
        floor_width, floor_height = 12.9, 27.1
        arena = floors.Floor(size=[floor_width, floor_height])

        self.assertGreater(arena._top_camera_y_padding_factor, 1)
        np.testing.assert_array_equal(arena._top_camera.quat, (1, 0, 0, 0))

        expected_camera_y = floor_height * arena._top_camera_y_padding_factor
        np.testing.assert_allclose(
            np.tan(np.deg2rad(arena._top_camera.fovy / 2)),
            expected_camera_y / arena._top_camera.pos[2])
Exemplo n.º 9
0
    def test_observables(self):
        walker = cmu_humanoid.CMUHumanoid()
        arena = floors.Floor()
        task = go_to_target.GoToTarget(walker=walker,
                                       arena=arena,
                                       moving_target=False)

        random_state = np.random.RandomState(12345)
        env = composer.Environment(task, random_state=random_state)
        timestep = env.reset()

        self.assertIn('walker/target', timestep.observation)
Exemplo n.º 10
0
 def test_target_position_randomized_on_reset(self):
     walker = cmu_humanoid.CMUHumanoid()
     arena = floors.Floor()
     task = go_to_target.GoToTarget(walker=walker,
                                    arena=arena,
                                    moving_target=False)
     random_state = np.random.RandomState(12345)
     env = composer.Environment(task, random_state=random_state)
     env.reset()
     first_target_position = task.target_position(env.physics)
     env.reset()
     second_target_position = task.target_position(env.physics)
     self.assertFalse(
         np.all(first_target_position == second_target_position),
         'Target positions are unexpectedly identical.')
Exemplo n.º 11
0
def jumping_ball_go_to_target(random_state=None):
    walker = jumping_ball.JumpingBallWithHead()

    # Build a standard floor arena.
    arena = floors.Floor()

    # Build a task that rewards the agent for going to a target.
    task = go_to_target.GoToTarget(walker=walker,
                                   arena=arena,
                                   sparse_reward=False,
                                   physics_timestep=_PHYSICS_TIMESTEP,
                                   control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Exemplo n.º 12
0
def cmu_humanoid_go_to_target(random_state=None):
    """Requires a CMU humanoid to go to a target."""

    # Build a position-controlled CMU humanoid walker.
    walker = cmu_humanoid.CMUHumanoidPositionControlled()

    # Build a standard floor arena.
    arena = floors.Floor()

    # Build a task that rewards the agent for going to a target.
    task = go_to_target.GoToTarget(walker=walker,
                                   arena=arena,
                                   physics_timestep=0.005,
                                   control_timestep=0.03)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Exemplo n.º 13
0
 def _make(self, clip_name, reward_type, start_step,
           always_init_at_clip_start, ghost_offset):
     """ Creates the underlying MocapTracking environment. """
     walker = cmu_humanoid.CMUHumanoidPositionControlledV2020
     arena = floors.Floor()
     task = tracking.MultiClipMocapTracking(
         walker=walker,
         arena=arena,
         ref_path=cmu_mocap_data.get_path_for_cmu(version='2020'),
         dataset=types.ClipCollection(ids=[clip_name]),
         ref_steps=(1, 2, 3, 4, 5),
         start_step=start_step,
         max_steps=256,
         reward_type=reward_type,
         always_init_at_clip_start=always_init_at_clip_start,
         termination_error_threshold=0.3,
         ghost_offset=ghost_offset,
         disable_observables=False,
     )
     env = composer.Environment(task=task,
                                random_state=np.random.RandomState(seed=42))
     return env
Exemplo n.º 14
0
  def __init__(self, walker,
               proto_modifier=None,
               negative_reward_on_failure_termination=True,
               priority_friction=False,
               bucket_offset=1.,
               y_range=0.5,
               toss_delay=0.5,
               randomize_init=False,
              ):
    """Initialize ball tossing task.

    Args:
      walker: the walker to be used in this task.
      proto_modifier: function to modify trajectory proto.
      negative_reward_on_failure_termination: flag to provide negative reward
        as task fails.
      priority_friction: sets friction priority thereby making prop objects have
        higher friction.
      bucket_offset: distance in meters to push bucket (away from walker)
      y_range: range (uniformly sampled) of distance in meters the ball is
        thrown left/right of the walker.
      toss_delay: time in seconds to delay after catching before changing reward
        to encourage throwing the ball.
      randomize_init: flag to randomize initial pose.
    """
    self._proto_modifier = proto_modifier
    self._negative_reward_on_failure_termination = (
        negative_reward_on_failure_termination)
    self._priority_friction = priority_friction
    self._bucket_rewarded = False
    self._bucket_offset = bucket_offset
    self._y_range = y_range
    self._toss_delay = toss_delay
    self._randomize_init = randomize_init

    # load a clip to grab a ball prop and initializations
    loader = mocap_loader.HDF5TrajectoryLoader(
        mocap_data.H5_PATH, trajectories.WarehouseTrajectory)
    clip_number = 54
    self._trajectory = loader.get_trajectory(
        mocap_data.IDENTIFIER_TEMPLATE.format(clip_number))

    # create the floor arena
    self._arena = floors.Floor()

    self._walker = walker
    self._walker_geoms = tuple(self._walker.mjcf_model.find_all('geom'))
    self._feet_geoms = (
        walker.mjcf_model.find('body', 'lfoot').find_all('geom') +
        walker.mjcf_model.find('body', 'rfoot').find_all('geom'))
    self._lhand_geoms = (
        walker.mjcf_model.find('body', 'lhand').find_all('geom'))
    self._rhand_geoms = (
        walker.mjcf_model.find('body', 'rhand').find_all('geom'))

    # resize the humanoid based on the motion capture data subject
    self._trajectory.configure_walkers([self._walker])
    walker.create_root_joints(self._arena.attach(walker))

    control_timestep = self._trajectory.dt
    self.set_timesteps(control_timestep, _PHYSICS_TIMESTEP)

    # build and attach the bucket to the arena
    self._bucket = props.Bucket(_BUCKET_SIZE)
    self._arena.attach(self._bucket)

    self._prop = self._trajectory.create_props(
        priority_friction=self._priority_friction)[0]
    self._arena.add_free_entity(self._prop)

    self._task_observables = collections.OrderedDict()

    # define feature based observations (agent may or may not use these)
    def ego_prop_xpos(physics):
      prop_xpos, _ = self._prop.get_pose(physics)
      walker_xpos = physics.bind(self._walker.root_body).xpos
      return self._walker.transform_vec_to_egocentric_frame(
          physics, prop_xpos - walker_xpos)
    self._task_observables['prop_{}/xpos'.format(0)] = (
        observable.Generic(ego_prop_xpos))

    def prop_zaxis(physics):
      prop_xmat = physics.bind(
          mjcf.get_attachment_frame(self._prop.mjcf_model)).xmat
      return prop_xmat[[2, 5, 8]]
    self._task_observables['prop_{}/zaxis'.format(0)] = (
        observable.Generic(prop_zaxis))

    def ego_bucket_xpos(physics):
      bucket_xpos, _ = self._bucket.get_pose(physics)
      walker_xpos = physics.bind(self._walker.root_body).xpos
      return self._walker.transform_vec_to_egocentric_frame(
          physics, bucket_xpos - walker_xpos)
    self._task_observables['bucket_{}/xpos'.format(0)] = (
        observable.Generic(ego_bucket_xpos))

    for obs in (self._walker.observables.proprioception +
                self._walker.observables.kinematic_sensors +
                self._walker.observables.dynamic_sensors +
                list(self._task_observables.values())):
      obs.enabled = True
Exemplo n.º 15
0
 def test_can_compile_mjcf(self):
     arena = floors.Floor()
     mjcf.Physics.from_mjcf_model(arena.mjcf_model)
Exemplo n.º 16
0
 def test_size(self):
     floor_size = (12.9, 27.1)
     arena = floors.Floor(size=floor_size)
     self.assertEqual(tuple(arena.ground_geoms[0].size[:2]), floor_size)
Exemplo n.º 17
0
def _build_task(**task_kwargs):
    walker = cmu_humanoid.CMUHumanoid()
    arena = floors.Floor()
    task = go_to_target.GoToTarget(walker=walker, arena=arena, **task_kwargs)
    return task