示例#1
0
def test_relative_action():
    for relative_action in [True, False]:
        # NOTE - this seed choice is very important, since starting states affect the test.
        # The test could be updated to be robust in the future.
        env = make_env(starting_seed=586895,
                       constants={"relative_action": relative_action})
        env.reset()
        zeros = np.zeros(env.unwrapped.sim.model.nu)
        not_zeros = np.ones(env.unwrapped.sim.model.nu) * 0.5
        num_robot_joints = len(
            [x for x in env.unwrapped.sim.model.joint_names if "robot0" in x])
        qpos_shape = env.unwrapped.sim.data.qpos.shape
        num_cube_joints = qpos_shape[0] - num_robot_joints

        for action in [zeros, not_zeros]:
            env.unwrapped.sim.data.qvel[:] = 0
            env.unwrapped.sim.data.qpos[:] = np.random.randn(*qpos_shape) * 0.1
            env.unwrapped.sim.data.qpos[:num_cube_joints] = -10.0
            for _ in range(10):
                env.unwrapped.step(action)
            qvel = np.sum(
                np.square(env.unwrapped.sim.data.qvel[num_cube_joints:]))
            if (action == zeros).all() and relative_action:
                assert qvel < 0.09
            else:
                assert qvel > 0.09
示例#2
0
def test_locked_cube():
    env = make_env(starting_seed=0)

    is_on_palm = []
    for idx in range(20):
        env.reset()

        expected_joints = (
            "cube:cube_tx",
            "cube:cube_ty",
            "cube:cube_tz",
            "cube:cube_rot",
            "target:cube_tx",
            "target:cube_ty",
            "target:cube_tz",
            "target:cube_rot",
            "robot0:WRJ1",
            "robot0:WRJ0",
            "robot0:FFJ3",
            "robot0:FFJ2",
            "robot0:FFJ1",
            "robot0:FFJ0",
            "robot0:MFJ3",
            "robot0:MFJ2",
            "robot0:MFJ1",
            "robot0:MFJ0",
            "robot0:RFJ3",
            "robot0:RFJ2",
            "robot0:RFJ1",
            "robot0:RFJ0",
            "robot0:LFJ4",
            "robot0:LFJ3",
            "robot0:LFJ2",
            "robot0:LFJ1",
            "robot0:LFJ0",
            "robot0:THJ4",
            "robot0:THJ3",
            "robot0:THJ2",
            "robot0:THJ1",
            "robot0:THJ0",
        )

        assert env.unwrapped.sim.model.joint_names == expected_joints
        with ignore_mujoco_warnings():
            for _ in range(20):
                obs, _, _, _ = env.step(env.action_space.nvec // 2)

        is_on_palm.append(on_palm(env.unwrapped.sim))

        # Make sure the mass is right.
        cube_body_idx = env.unwrapped.sim.model.body_name2id("cube:middle")
        assert_allclose(
            env.unwrapped.sim.model.body_subtreemass[cube_body_idx],
            0.078,
            atol=1e-3)

    assert (
        np.mean(is_on_palm) >= 0.8
    ), "Cube should stay in hand (most of the time) when zero action is sent."
示例#3
0
def test_informative_obs():
    WHITELIST = [
        # The position of the goal is zeroed
        "relative_goal_pos",
        "noisy_relative_goal_pos",
        "goal_pos",
        # Not all episodes end with a fall, i.e. it might be all zeros
        "fell_down",
        "is_goal_achieved",
    ]

    env = make_env(constants=dict(randomize=False, max_timesteps_per_goal=50))
    obs = env.reset()
    done = False
    all_obs = [obs]
    while not done:
        obs, reward, done, info = env.step(env.action_space.sample())
        all_obs.append(obs)
    all_obs.append(env.reset())  # one more reset at the end

    # Collect all obs and index by key.
    keys = set(all_obs[0].keys())
    assert len(keys) > 0
    combined_obs_by_keys = {key: [] for key in keys}
    for obs in all_obs:
        assert set(obs.keys()) == keys
        for key in keys:
            combined_obs_by_keys[key].append(obs[key])

    # Make sure that none of the keys has all-constant obs.
    for key, obs in combined_obs_by_keys.items():
        assert len(obs) == len(all_obs)
        if key in WHITELIST:
            continue

        obs0 = obs[0]
        equals = [np.array_equal(obs0, obs_i) for obs_i in obs]
        # If ob0 is equal to all other obs, all obs are equal, i.e. the observation
        # contains no information whatsoever. This is usually bad (e.g. we had an issue
        # in the past where qpos was aways set to all-zeros).
        assert not np.all(
            equals), "observations for {} are all equal to {}".format(
                key, obs0)
示例#4
0
def test_det_locked_consistent():
    seed = 12345
    helper_test_two_deterministic_envs(
        make_env(constants=dict(randomize=False), starting_seed=seed),
        make_env(constants=dict(randomize=False), starting_seed=seed),
    )
示例#5
0
def test_rand_locked_consistent():
    seed = 12345
    helper_test_two_deterministic_envs(make_env(starting_seed=seed),
                                       make_env(starting_seed=seed))