Exemplo n.º 1
0
    def test_termination_and_discount(self):
        walker = cmu_humanoid.CMUHumanoid()
        arena = floors.Floor()
        task = go_to_target.GoToTarget(walker=walker, arena=arena)

        random_state = np.random.RandomState(12345)
        env = composer.Environment(task, random_state=random_state)
        env.reset()

        zero_action = np.zeros_like(env.physics.data.ctrl)

        # Walker starts in upright position.
        # Should not trigger failure termination in the first few steps.
        for _ in range(5):
            env.step(zero_action)
            self.assertFalse(task.should_terminate_episode(env.physics))
            np.testing.assert_array_equal(task.get_discount(env.physics), 1)

        # Rotate the walker upside down and run the physics until it makes contact.
        current_time = env.physics.data.time
        walker.shift_pose(env.physics,
                          position=(0, 0, 10),
                          quaternion=(0, 1, 0, 0))
        env.physics.forward()
        while env.physics.data.ncon == 0:
            env.physics.step()
        env.physics.data.time = current_time

        # Should now trigger a failure termination.
        env.step(zero_action)
        self.assertTrue(task.should_terminate_episode(env.physics))
        np.testing.assert_array_equal(task.get_discount(env.physics), 0)
Exemplo n.º 2
0
    def test_reward_fixed_target(self):
        walker = cmu_humanoid.CMUHumanoid()
        arena = floors.Floor()
        task = go_to_target.GoToTarget(walker=walker,
                                       arena=arena,
                                       moving_target=False)

        random_state = np.random.RandomState(12345)
        env = composer.Environment(task, random_state=random_state)
        env.reset()

        target_position = task.target_position(env.physics)
        zero_action = np.zeros_like(env.physics.data.ctrl)
        for _ in range(2):
            timestep = env.step(zero_action)
            self.assertEqual(timestep.reward, 0)
        walker_pos = env.physics.bind(walker.root_body).xpos
        walker.set_pose(
            env.physics,
            position=[target_position[0], target_position[1], walker_pos[2]])
        env.physics.forward()

        # Receive reward while the agent remains at that location.
        timestep = env.step(zero_action)
        self.assertEqual(timestep.reward, 1)

        # Target position should not change.
        np.testing.assert_array_equal(target_position,
                                      task.target_position(env.physics))
Exemplo n.º 3
0
    def test_observables(self):
        walker = cmu_humanoid.CMUHumanoid()
        arena = floors.Floor()
        task = go_to_target.GoToTarget(walker=walker,
                                       arena=arena,
                                       moving_target=False)

        random_state = np.random.RandomState(12345)
        env = composer.Environment(task, random_state=random_state)
        timestep = env.reset()

        self.assertIn('walker/target', timestep.observation)
Exemplo n.º 4
0
 def test_target_position_randomized_on_reset(self):
     walker = cmu_humanoid.CMUHumanoid()
     arena = floors.Floor()
     task = go_to_target.GoToTarget(walker=walker,
                                    arena=arena,
                                    moving_target=False)
     random_state = np.random.RandomState(12345)
     env = composer.Environment(task, random_state=random_state)
     env.reset()
     first_target_position = task.target_position(env.physics)
     env.reset()
     second_target_position = task.target_position(env.physics)
     self.assertFalse(
         np.all(first_target_position == second_target_position),
         'Target positions are unexpectedly identical.')
Exemplo n.º 5
0
def jumping_ball_go_to_target(random_state=None):
    walker = jumping_ball.JumpingBallWithHead()

    # Build a standard floor arena.
    arena = floors.Floor()

    # Build a task that rewards the agent for going to a target.
    task = go_to_target.GoToTarget(walker=walker,
                                   arena=arena,
                                   sparse_reward=False,
                                   physics_timestep=_PHYSICS_TIMESTEP,
                                   control_timestep=_CONTROL_TIMESTEP)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Exemplo n.º 6
0
def cmu_humanoid_go_to_target(random_state=None):
    """Requires a CMU humanoid to go to a target."""

    # Build a position-controlled CMU humanoid walker.
    walker = cmu_humanoid.CMUHumanoidPositionControlled()

    # Build a standard floor arena.
    arena = floors.Floor()

    # Build a task that rewards the agent for going to a target.
    task = go_to_target.GoToTarget(walker=walker,
                                   arena=arena,
                                   physics_timestep=0.005,
                                   control_timestep=0.03)

    return composer.Environment(time_limit=30,
                                task=task,
                                random_state=random_state,
                                strip_singleton_obs_buffer_dim=True)
Exemplo n.º 7
0
def _build_task(**task_kwargs):
    walker = cmu_humanoid.CMUHumanoid()
    arena = floors.Floor()
    task = go_to_target.GoToTarget(walker=walker, arena=arena, **task_kwargs)
    return task