Exemplo n.º 1
0
def experiment_wo_distruber(env_real: RealEnv, env_sim: SimEnv):
    # Wrap the environment in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run learned policy on the device
    print_cbt('Running the evaluation policy ...', 'c', bright=True)
    return rollout(env_real,
                   policy,
                   eval=True,
                   max_steps=args.max_steps,
                   render_mode=RenderMode(text=True),
                   no_reset=True,
                   no_close=True)
Exemplo n.º 2
0
from pyrado.utils.argparser import get_argparser

if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment()

    # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir)

    # Detect the correct real-world counterpart and create it
    if isinstance(inner_env(env_sim), WAMBallInCupSim):
        # If `max_steps` (or `dt`) are not explicitly set using `args`, use the same as in the simulation
        max_steps = args.max_steps if args.max_steps < pyrado.inf else env_sim.max_steps
        dt = args.dt if args.dt is not None else env_sim.dt
        env_real = WAMBallInCupReal(dt=dt, max_steps=max_steps)
    else:
        raise pyrado.TypeErr(given=env_sim, expected_type=WAMBallInCupSim)

    # Finally wrap the env in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run on device
    done = False
    while not done:
        ro = rollout(env_real, policy, eval=True)
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, _, _ = after_rollout_query(env_real, policy, ro)
Exemplo n.º 3
0
def experiment_w_distruber(env_real: RealEnv, env_sim: SimEnv):
    # Wrap the environment in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run learned policy on the device
    print_cbt('Running the evaluation policy ...', 'c', bright=True)
    ro1 = rollout(env_real,
                  policy,
                  eval=True,
                  max_steps=args.max_steps // 3,
                  render_mode=RenderMode(),
                  no_reset=True,
                  no_close=True)

    # Run disturber
    env_real = inner_env(env_real)  # since we are reusing it
    print_cbt('Running the 1st disturber ...', 'c', bright=True)
    rollout(env_real,
            disturber_pos,
            eval=True,
            max_steps=steps_disturb,
            render_mode=RenderMode(),
            no_reset=True,
            no_close=True)

    # Wrap the environment in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run learned policy on the device
    print_cbt('Running the evaluation policy ...', 'c', bright=True)
    ro2 = rollout(env_real,
                  policy,
                  eval=True,
                  max_steps=args.max_steps // 3,
                  render_mode=RenderMode(),
                  no_reset=True,
                  no_close=True)

    # Run disturber
    env_real = inner_env(env_real)  # since we are reusing it
    print_cbt('Running the 2nd disturber ...', 'c', bright=True)
    rollout(env_real,
            disturber_neg,
            eval=True,
            max_steps=steps_disturb,
            render_mode=RenderMode(),
            no_reset=True,
            no_close=True)

    # Wrap the environment in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run learned policy on the device
    print_cbt('Running the evaluation policy ...', 'c', bright=True)
    ro3 = rollout(env_real,
                  policy,
                  eval=True,
                  max_steps=args.max_steps // 3,
                  render_mode=RenderMode(),
                  no_reset=True,
                  no_close=True)

    return StepSequence.concat([ro1, ro2, ro3])
Exemplo n.º 4
0
    df = pd.DataFrame(columns=['policy', 'ret', 'len'])

    # Evaluate all policies
    for i, (env_sim, policy) in enumerate(zip(env_sim_list, policy_list)):
        # Create a new sampler pool for every policy to synchronize the random seeds i.e. init states
        pool = SamplerPool(args.num_workers)

        # Seed the sampler
        if args.seed is not None:
            pool.set_seed(args.seed)
            print_cbt(f"Set the random number generators' seed to {args.seed}.", 'w')
        else:
            print_cbt('No seed was set', 'y')

        # Add the same wrappers as during training
        env = wrap_like_other_env(env, env_sim)

        # Sample rollouts
        ros = eval_randomized_domain(pool, env, pert, policy, init_state_list)  # internally calls DomainRandWrapperLive

        # Compute results metrics
        rets = [ro.undiscounted_return() for ro in ros]
        lengths = [float(ro.length) for ro in ros]  # int values are not numeric in pandas
        df = df.append(pd.DataFrame(dict(policy=ex_labels[i], ret=rets, len=lengths)), ignore_index=True)

    metrics = dict(
        avg_len=df.groupby('policy').mean()['len'].to_dict(),
        avg_ret=df.groupby('policy').mean()['ret'].to_dict(),
        median_ret=df.groupby('policy').median()['ret'].to_dict(),
        min_ret=df.groupby('policy').min()['ret'].to_dict(),
        max_ret=df.groupby('policy').max()['ret'].to_dict(),