示例#1
0
def rollout_dummy_rbf_policy_4dof():
    # Environment
    env = WAMBallInCupSim(
        num_dof=4,
        max_steps=3000,
        # Note, when tuning the task args: the `R` matrices are now 4x4 for the 4 dof WAM
        task_args=dict(R=np.zeros((4, 4)),
                       R_dev=np.diag([0.2, 0.2, 1e-2, 1e-2])),
    )

    # Stabilize ball and print out the stable state
    env.reset()
    act = np.zeros(env.spec.act_space.flat_dim)
    for i in range(1500):
        env.step(act)
        env.render(mode=RenderMode(video=True))

    # Printing out actual positions for 4-dof (..just needed to setup the hard-coded values in the class)
    print('Ball pos:', env.sim.data.get_body_xpos('ball'))
    print('Cup goal:', env.sim.data.get_site_xpos('cup_goal'))
    print('Joint pos (incl. first rope angle):', env.sim.data.qpos[:5])

    # Apply DualRBFLinearPolicy and plot the joint states over the desired ones
    rbf_hparam = dict(num_feat_per_dim=7,
                      bounds=(np.array([0.]), np.array([1.])))
    policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=2)
    done, param = False, None
    while not done:
        ro = rollout(env,
                     policy,
                     render_mode=RenderMode(video=True),
                     eval=True,
                     reset_kwargs=dict(domain_param=param))
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, _, param = after_rollout_query(env, policy, ro)
示例#2
0
def rollout_dummy_rbf_policy_7dof():
    # Environment
    env = WAMBallInCupSim(num_dof=7, max_steps=1750, task_args=dict(sparse_rew_fcn=True))

    # Stabilize around initial position
    env.reset(domain_param=dict(cup_scale=1.0, rope_length=0.3103, ball_mass=0.021))
    act = np.zeros((6,))  # desired deltas from the initial pose
    for i in range(500):
        env.step(act)
        env.render(mode=RenderMode(video=True))

    # Apply DualRBFLinearPolicy
    policy_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.0]), np.array([1.0])))
    policy = DualRBFLinearPolicy(env.spec, policy_hparam, dim_mask=1)
    done, param = False, None
    while not done:
        ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param))
        print_cbt(f"Return: {ro.undiscounted_return()}", "g", bright=True)
        done, _, param = after_rollout_query(env, policy, ro)

    # Retrieve infos from rollout
    t = ro.time
    des_pos_traj = ro.env_infos["qpos_des"]
    pos_traj = ro.env_infos["qpos"]
    des_vel_traj = ro.env_infos["qvel_des"]
    vel_traj = ro.env_infos["qvel"]
    ball_pos = ro.env_infos["ball_pos"]
    cup_pos = ro.env_infos["cup_pos"]

    # Plot trajectories of the directly controlled joints and their corresponding desired trajectories
    fig, ax = plt.subplots(3, sharex="all")
    for i, idx in enumerate([1, 3, 5]):
        ax[i].plot(t, des_pos_traj[:, idx], label=f"qpos_des {idx}")
        ax[i].plot(t, pos_traj[:, idx], label=f"qpos {idx}")
        ax[i].legend()

    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.plot(xs=ball_pos[:, 0], ys=ball_pos[:, 1], zs=ball_pos[:, 2], color="blue", label="Ball")
    ax.scatter(xs=ball_pos[-1, 0], ys=ball_pos[-1, 1], zs=ball_pos[-1, 2], color="blue", label="Ball final")
    ax.plot(xs=cup_pos[:, 0], ys=cup_pos[:, 1], zs=cup_pos[:, 2], color="red", label="Cup")
    ax.scatter(xs=cup_pos[-1, 0], ys=cup_pos[-1, 1], zs=cup_pos[-1, 2], color="red", label="Cup final")
    ax.legend()
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")
    ax.view_init(elev=16.0, azim=-7.0)
    plt.show()
示例#3
0
def rollout_dummy_rbf_policy():
    # Environment
    env = WAMBallInCupSim(max_steps=1750, task_args=dict(sparse_rew_fcn=True))

    # Stabilize around initial position
    env.reset(domain_param=dict(cup_scale=1., rope_length=0.3103, ball_mass=0.021))
    act = np.zeros((6,))  # desired deltas from the initial pose
    for i in range(500):
        env.step(act)
        env.render(mode=RenderMode(video=True))

    # Apply DualRBFLinearPolicy
    rbf_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.]), np.array([1.])))
    policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=1)
    done, param = False, None
    while not done:
        ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param))
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, _, param = after_rollout_query(env, policy, ro)

    # Retrieve infos from rollout
    t = ro.env_infos['t']
    des_pos_traj = ro.env_infos['qpos_des']  # (max_steps,7) ndarray
    pos_traj = ro.env_infos['qpos']
    des_vel_traj = ro.env_infos['qvel_des']  # (max_steps,7) ndarray
    vel_traj = ro.env_infos['qvel']
    ball_pos = ro.env_infos['ball_pos']
    state_des = ro.env_infos['state_des']

    # Plot trajectories of the directly controlled joints and their corresponding desired trajectories
    fig, ax = plt.subplots(3, sharex='all')
    for i, idx in enumerate([1, 3, 5]):
        ax[i].plot(t, des_pos_traj[:, idx], label=f'qpos_des {idx}')
        ax[i].plot(t, pos_traj[:, idx], label=f'qpos {idx}')
        ax[i].legend()

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.plot(xs=ball_pos[:, 0], ys=ball_pos[:, 1], zs=ball_pos[:, 2], color='blue', label='Ball')
    ax.scatter(xs=ball_pos[-1, 0], ys=ball_pos[-1, 1], zs=ball_pos[-1, 2], color='blue', label='Ball final')
    ax.plot(xs=state_des[:, 0], ys=state_des[:, 1], zs=state_des[:, 2], color='red', label='Cup')
    ax.scatter(xs=state_des[-1, 0], ys=state_des[-1, 1], zs=state_des[-1, 2], color='red', label='Cup final')
    ax.legend()
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.view_init(elev=16., azim=-7.)
    plt.show()
示例#4
0
 def __init__(self):
     ShowBase.__init__(self)
     self.done = False
     self.state = None
     self.param = None
     print("a")
     self.ro = rollout(
         env,
         policy,
         render_mode=RenderMode(text=args.verbose,
                                video=args.animation),
         eval=True,
         max_steps=max_steps,
         stop_on_done=not args.relentless,
         reset_kwargs=dict(domain_param=self.param,
                           init_state=self.state),
     )
     print("hoi")
     print_domain_params(env.domain_param)
     print_cbt(f"Return: {self.ro.undiscounted_return()}",
               "g",
               bright=True)
     self.done, self.state, self.param = after_rollout_query(
         env, policy, self.ro)
     print("1")
     self.bob = BallOnBeamSim(2)
     print("2")
     self.pos, self.r_ball, self.a, self.l_beam, self.d_beam = self.bob._init_anim(
     )
     print("3")
     self.ball = self.loader.loadModel("my_models/ball")
     self.ball.reparentTo(self.render)
     self.ball.setPos(self.pos)
     self.box = self.loader.loadModel("my_models/box")
     self.box.reparentTo(self.render)
     self.box.setPos(0, 0, 0)
     self.box.setScale(self.l_beam, self.d_beam, 2 * self.d_beam)
     self.camera.setPos(0, -10, 0)
示例#5
0
def sim_policy_fixed_env(env: SimEnv, policy: Policy,
                         domain_param: [dict, list]):
    """
    Simulate (with animation) a rollout in a environment with fixed domain parameters.

    :param env: environment stack as it was used during training
    :param policy: policy to simulate
    :param domain_param: domain parameter set or a list of sets that specify the environment
    """
    # Remove wrappers that make the rollouts stochastic
    env = remove_env(env, GaussianObsNoiseWrapper)
    env = remove_env(env, DomainRandWrapperBuffer)
    env = remove_env(env, DomainRandWrapperLive)

    # Initialize
    done, state, i = False, None, 0
    if isinstance(domain_param, dict):
        param = domain_param
    elif isinstance(domain_param, list):
        param = domain_param[i]
    else:
        raise pyrado.TypeErr(given=domain_param, expected_type=[dict, list])

    while not done:
        ro = rollout(env,
                     policy,
                     reset_kwargs=dict(domain_param=param, init_state=state),
                     render_mode=RenderMode(video=True),
                     eval=True)
        print_domain_params(env.domain_param)
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, state, _ = after_rollout_query(env, policy, ro)

        if isinstance(domain_param, list):
            # Iterate over the list of domain parameter sets
            i = (i + 1) % len(domain_param)
            param = domain_param[i]
示例#6
0
        cand = to.load(osp.join(ex_dir, found_cands[i])).numpy()
        ax.scatter(np.arange(cand.size), cand, label='$\phi_{' + str(i) + '}$', c=f'C{i%10}', s=16)
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.set_ylabel('parameter value')
    ax.set_xlabel('parameter index')
    plt.legend()
    plt.show()

    # Simulate
    for i in range(len(found_policies)):
        # Load current
        policy = to.load(osp.join(ex_dir, found_policies[i]))
        cand = to.load(osp.join(ex_dir, found_cands[i]))

        # Set the domain randomizer given the hyper-parameters
        if isinstance(env_sim, MetaDomainRandWrapper):
            env_sim.adapt_randomizer(cand)
            print_cbt(f'Set the domain randomizer to\n{env_sim.randomizer}', 'c')
        else:
            raise pyrado.TypeErr(given=env_sim, expected_type=MetaDomainRandWrapper)

        done, state, param = False, None, None
        while not done:
            print_cbt(f'Simulating {found_policies[i]} with associated domain parameter distribution.', 'g')
            ro = rollout(env_sim, policy, render_mode=RenderMode(video=True), eval=True,
                         reset_kwargs=dict(domain_param=param, init_state=state))  # calls env.reset()
            print_domain_params(env_sim.domain_param)
            print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
            done, state, param = after_rollout_query(env_sim, policy, ro)
    pyrado.close_vpython()
示例#7
0
from pyrado.utils.argparser import get_argparser

if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment()

    # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir)

    # Detect the correct real-world counterpart and create it
    if isinstance(inner_env(env_sim), WAMBallInCupSim):
        # If `max_steps` (or `dt`) are not explicitly set using `args`, use the same as in the simulation
        max_steps = args.max_steps if args.max_steps < pyrado.inf else env_sim.max_steps
        dt = args.dt if args.dt is not None else env_sim.dt
        env_real = WAMBallInCupReal(dt=dt, max_steps=max_steps)
    else:
        raise pyrado.TypeErr(given=env_sim, expected_type=WAMBallInCupSim)

    # Finally wrap the env in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run on device
    done = False
    while not done:
        ro = rollout(env_real, policy, eval=True)
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, _, _ = after_rollout_query(env_real, policy, ro)