def eval_damping(): """ Plot joint trajectories for different joint damping parameters """ # Load experiment and remove possible randomization wrappers ex_dir = ask_for_experiment() env, policy, _ = load_experiment(ex_dir) env = inner_env(env) env.domain_param = WAMBallInCupSim.get_nominal_domain_param() data = [] t = [] dampings = [0., 1e-2, 1e-1, 1e0] print_cbt(f'Run policy for damping coefficients: {dampings}') for d in dampings: env.reset(domain_param=dict(joint_damping=d)) ro = rollout(env, policy, render_mode=RenderMode(video=False), eval=True) t.append(ro.env_infos['t']) data.append(ro.env_infos['qpos']) fig, ax = plt.subplots(3, sharex='all') ls = ['k-', 'b--', 'g-.', 'r:'] # line style setting for better visibility for i, idx in enumerate([1, 3, 5]): for j in range(len(dampings)): ax[i].plot(t[j], data[j][:, idx], ls[j], label=f'damping: {dampings[j]}') if i == 0: ax[i].legend() ax[i].set_ylabel(f'joint {idx} pos [rad]') ax[2].set_xlabel('time [s]') plt.suptitle('Evaluation of joint damping coefficient') plt.show()
def load_teacher_experiment(self, exp: Experiment): """ Load teachers from PDDRTeachers experiment. :param exp: the teacher's experiment object """ _, _, extra = load_experiment(exp) self.unpack_teachers(extra)
def test_pddr(ex_dir, env: SimEnv, policy, algo_hparam): pyrado.set_seed(0) # Create algorithm and train teacher_policy = deepcopy(policy) critic = GAE( vfcn=FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace), hidden_sizes=[16, 16], hidden_nonlin=to.tanh)) teacher_algo_hparam = dict(critic=critic, min_steps=1500, max_iter=2) teacher_algo = PPO # Wrapper randomizer = create_default_randomizer(env) env = DomainRandWrapperLive(env, randomizer) # Subroutine algo_hparam = dict( max_iter=2, min_steps=env.max_steps, std_init=0.15, num_epochs=10, num_teachers=2, teacher_policy=teacher_policy, teacher_algo=teacher_algo, teacher_algo_hparam=teacher_algo_hparam, num_workers=1, ) algo = PDDR(ex_dir, env, policy, **algo_hparam) algo.train() assert algo.curr_iter == algo.max_iter # Save and load algo.save_snapshot(meta_info=None) algo_loaded = Algorithm.load_snapshot(load_dir=ex_dir) assert isinstance(algo_loaded, Algorithm) policy_loaded = algo_loaded.policy # Check assert all(algo.policy.param_values == policy_loaded.param_values) # Load the experiment. Since we did not save any hyper-parameters, we ignore the errors when loading. env, policy, extra = load_experiment(ex_dir) assert isinstance(env, Env) assert isinstance(policy, Policy) assert isinstance(extra, dict)
def conditional_actnorm_wrapper(env: Env, ex_dirs: list, idx: int): """ Wrap the environment with an action normalization wrapper if the simulated environment had one. :param env: environment to sample from :param ex_dirs: list of experiment directories that will be loaded :param idx: index of the current directory :return: modified environment """ # Get the simulation environment env_sim, _, _ = load_experiment(ex_dirs[idx]) if typed_env(env_sim, ActNormWrapper) is not None: env = ActNormWrapper(env) print_cbt( f'Added an action normalization wrapper to {idx + 1}-th evaluation policy.', 'y') else: env = remove_env(env, ActNormWrapper) print_cbt( f'Removed an action normalization wrapper to {idx + 1}-th evaluation policy.', 'y') return env
mode = input( "Pass ep for episodic and sb for step-based control mode: " ).lower() qpos_real = np.load(osp.join(ex_dir, f"qpos_real_{mode}.npy")) qvel_real = np.load(osp.join(ex_dir, f"qvel_real_{mode}.npy")) except FileNotFoundError: real_data_exists = False print_cbt( f"Did not find a recorded real trajectory (qpos_real_{mode} and qvel_real_{mode}) for this policy. " f"Run deployment/run_policy_wam.py to get real-world trajectories.", "y", bright=True, ) # Load the policy and the environment env, policy, _ = load_experiment(ex_dir, args) # Get nominal environment env = remove_all_dr_wrappers(env) env.domain_param = env.get_nominal_domain_param() env.stop_on_collision = False # Fix seed for reproducibility pyrado.set_seed(args.seed) # Use the recorded initial state from the real system init_state = env.init_space.sample_uniform() if real_data_exists: if input( "Use the recorded initial state from the real system? [y] / n " ).lower() == "" or "y":
from pyrado.environments.mujoco.wam import WAMBallInCupSim from pyrado.logger.experiment import ask_for_experiment from pyrado.sampling.rollout import rollout, after_rollout_query from pyrado.utils.experiments import wrap_like_other_env, load_experiment from pyrado.utils.input_output import print_cbt from pyrado.utils.argparser import get_argparser if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Get the experiment's directory to load from ex_dir = ask_for_experiment() # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir) # Detect the correct real-world counterpart and create it if isinstance(inner_env(env_sim), WAMBallInCupSim): # If `max_steps` (or `dt`) are not explicitly set using `args`, use the same as in the simulation max_steps = args.max_steps if args.max_steps < pyrado.inf else env_sim.max_steps dt = args.dt if args.dt is not None else env_sim.dt env_real = WAMBallInCupReal(dt=dt, max_steps=max_steps) else: raise pyrado.TypeErr(given=env_sim, expected_type=WAMBallInCupSim) # Finally wrap the env in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) # Run on device done = False
from pyrado.logger.experiment import ask_for_experiment from pyrado.sampling.rollout import rollout, after_rollout_query from pyrado.utils.argparser import get_argparser from pyrado.utils.experiments import load_experiment from pyrado.utils.input_output import print_cbt from pyrado.utils.data_types import RenderMode if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Get the experiment's directory to load from ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir # Load the environment and the policy env, policy, kwout = load_experiment(ex_dir, args) # Override the time step size if specified if args.dt is not None: env.dt = args.dt if args.verbose: print('Hyper-parameters of the experiment') pprint(kwout.get('hparams', 'No hyper-parameters found!')) if args.remove_dr_wrappers: env = remove_all_dr_wrappers(env, verbose=True) # Use the environments number of steps in case of the default argument (inf) max_steps = env.max_steps if args.max_steps == pyrado.inf else args.max_steps
env = ActDelayWrapper(env) # param_spec['act_delay'] = np.linspace(0, 60, num=21, endpoint=True, dtype=int) if not len(param_spec.keys()) == 1: raise pyrado.ValueErr(msg='Do not vary more than one domain parameter for this script! (Check action delay.)') varied_param_key = ''.join(param_spec.keys()) # to get a str if not (len(prefixes) == len(exp_names) and len(prefixes) == len(exp_labels)): raise pyrado.ShapeErr(msg=f'The lengths of prefixes, exp_names, and exp_labels must be equal, ' f'but they are {len(prefixes)}, {len(exp_names)}, and {len(exp_labels)}!') # Load the policies ex_dirs = [osp.join(p, e) for p, e in zip(prefixes, exp_names)] policies = [] for ex_dir in ex_dirs: _, policy, _ = load_experiment(ex_dir) policies.append(policy) # Create one-dim results grid and ensure right number of rollouts param_list = param_grid(param_spec) param_list *= args.num_ro_per_config # Fix initial state (set to None if it should not be fixed) init_state = None # Crate empty data frame df = pd.DataFrame(columns=['policy', 'ret', 'len', varied_param_key]) # Evaluate all policies for i, policy in enumerate(policies): # Create a new sampler pool for every policy to synchronize the random seeds i.e. init states
def test_snapshots_notmeta(ex_dir, env: SimEnv, policy, algo_class, algo_hparam): # Collect hyper-parameters, create algorithm, and train common_hparam = dict(max_iter=1, num_workers=1) common_hparam.update(algo_hparam) if issubclass(algo_class, ActorCritic): common_hparam.update( min_rollouts=3, critic=GAE( vfcn=FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace), hidden_sizes=[16, 16], hidden_nonlin=to.tanh)), ) elif issubclass(algo_class, ParameterExploring): common_hparam.update(num_init_states_per_domain=1) elif issubclass(algo_class, (DQL, SAC)): common_hparam.update(memory_size=1000, num_updates_per_step=2, gamma=0.99, min_rollouts=1) fnn_hparam = dict(hidden_sizes=[8, 8], hidden_nonlin=to.tanh) if issubclass(algo_class, DQL): # Override the setting env = BallOnBeamDiscSim(env.dt, env.max_steps) net = FNN( input_size=DiscreteActQValPolicy.get_qfcn_input_size(env.spec), output_size=DiscreteActQValPolicy.get_qfcn_output_size(), **fnn_hparam, ) policy = DiscreteActQValPolicy(spec=env.spec, net=net) else: # Override the setting env = ActNormWrapper(env) policy = TwoHeadedGRUPolicy(env.spec, shared_hidden_size=8, shared_num_recurrent_layers=1) obsact_space = BoxSpace.cat([env.obs_space, env.act_space]) common_hparam.update(qfcn_1=FNNPolicy( spec=EnvSpec(obsact_space, ValueFunctionSpace), **fnn_hparam)) common_hparam.update(qfcn_2=FNNPolicy( spec=EnvSpec(obsact_space, ValueFunctionSpace), **fnn_hparam)) else: raise NotImplementedError # Simulate training algo = algo_class(ex_dir, env, policy, **common_hparam) algo.policy.param_values += to.tensor([42.0]) if isinstance(algo, ActorCritic): algo.critic.vfcn.param_values += to.tensor([42.0]) # Save and load algo.save_snapshot(meta_info=None) algo_loaded = Algorithm.load_snapshot(load_dir=ex_dir) assert isinstance(algo_loaded, Algorithm) policy_loaded = algo_loaded.policy if isinstance(algo, ActorCritic): critic_loaded = algo_loaded.critic # Check assert all(algo.policy.param_values == policy_loaded.param_values) if isinstance(algo, ActorCritic): assert all( algo.critic.vfcn.param_values == critic_loaded.vfcn.param_values) # Load the experiment. Since we did not save any hyper-parameters, we ignore the errors when loading. env, policy, extra = load_experiment(ex_dir) assert isinstance(env, Env) assert isinstance(policy, Policy) assert isinstance(extra, dict)
# Check arguments src_domain_param_args = ["ml", "nominal", "posterior", "prior", None] if args.src_domain_param not in src_domain_param_args: raise pyrado.ValueErr(given_name="src_domain_param", eq_constraint=src_domain_param_args) # Get the experiment's directory to load from ex_dir = ask_for_experiment( hparam_list=args.show_hparams) if args.dir is None else args.dir # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart) if args.iter != -1: args.policy_name = f"iter_{args.iter}_policy" if args.init: args.policy_name = "init_policy" env_sim, policy, extra = load_experiment(ex_dir, args) # Create the domain parameter mapping dp_mapping = dict() if extra is not None: dp_counter = 0 for key in sorted(extra["hparams"]["dp_mapping"].keys()): dp = extra["hparams"]["dp_mapping"][key] if dp in extra["hparams"]["dp_selection"]: dp_mapping[dp_counter] = dp dp_counter += 1 pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=policy) # Reset the policy's domain parameter if desired prior, posterior = None, None
state = state.repeat(varying.shape[0], varying.shape[1], 1) # Insert the values of the evaluation mesh grid into the selected state dimensions state[:, :, self._idcs] = varying return self._fcn(state) if __name__ == "__main__": # Parse command line arguments args = get_argparser().parse_args() plt.rc("text", usetex=args.use_tex) # Get the experiment's directory to load from ex_dir = ask_for_experiment(hparam_list=args.show_hparams) if args.dir is None else args.dir # Load the environment and the value function env, _, kwout = load_experiment(ex_dir, args) vfcn = kwout["vfcn"] if not len(args.idcs) == 2: pyrado.ShapeErr(msg="Please provide exactly two indices to slice the value function input space (obs_space)!") # Use the environments lower and upper bounds to parametrize the mesh grid lb, ub = env.obs_space.bounds lb_inf_check = np.isinf(lb) ub_inf_check = np.isinf(ub) if lb_inf_check.any(): warn("Detected at least one inf entry in mesh grid's lower bound, replacing all with -1.") lb[lb_inf_check] = -1.0 if ub_inf_check.any(): warn("Detected at least one inf entry in mesh grid's upper bound, replacing all with 1.") ub[ub_inf_check] = 1.0
def evaluate_policy(args, ex_dir): """Helper function to evaluate the policy from an experiment in the associated environment.""" env, policy, _ = load_experiment(ex_dir, args) # Create multi-dim evaluation grid param_spec = dict() param_spec_dim = None if isinstance(inner_env(env), BallOnPlateSim): param_spec["ball_radius"] = np.linspace(0.02, 0.08, num=2, endpoint=True) param_spec["ball_rolling_friction_coefficient"] = np.linspace(0.0295, 0.9, num=2, endpoint=True) elif isinstance(inner_env(env), QQubeSwingUpSim): eval_num = 200 # Use nominal values for all other parameters. for param, nominal_value in env.get_nominal_domain_param().items(): param_spec[param] = nominal_value # param_spec["gravity_const"] = np.linspace(5.0, 15.0, num=eval_num, endpoint=True) param_spec["damping_pend_pole"] = np.linspace(0.0, 0.0001, num=eval_num, endpoint=True) param_spec["damping_rot_pole"] = np.linspace(0.0, 0.0006, num=eval_num, endpoint=True) param_spec_dim = 2 elif isinstance(inner_env(env), QBallBalancerSim): # param_spec["gravity_const"] = np.linspace(7.91, 11.91, num=11, endpoint=True) # param_spec["ball_mass"] = np.linspace(0.003, 0.3, num=11, endpoint=True) # param_spec["ball_radius"] = np.linspace(0.01, 0.1, num=11, endpoint=True) param_spec["plate_length"] = np.linspace(0.275, 0.275, num=11, endpoint=True) param_spec["arm_radius"] = np.linspace(0.0254, 0.0254, num=11, endpoint=True) # param_spec["load_inertia"] = np.linspace(5.2822e-5*0.5, 5.2822e-5*1.5, num=11, endpoint=True) # param_spec["motor_inertia"] = np.linspace(4.6063e-7*0.5, 4.6063e-7*1.5, num=11, endpoint=True) # param_spec["gear_ratio"] = np.linspace(60, 80, num=11, endpoint=True) # param_spec["gear_efficiency"] = np.linspace(0.6, 1.0, num=11, endpoint=True) # param_spec["motor_efficiency"] = np.linspace(0.49, 0.89, num=11, endpoint=True) # param_spec["motor_back_emf"] = np.linspace(0.006, 0.066, num=11, endpoint=True) # param_spec["motor_resistance"] = np.linspace(2.6*0.5, 2.6*1.5, num=11, endpoint=True) # param_spec["combined_damping"] = np.linspace(0.0, 0.05, num=11, endpoint=True) # param_spec["friction_coeff"] = np.linspace(0, 0.015, num=11, endpoint=True) # param_spec["voltage_thold_x_pos"] = np.linspace(0.0, 1.0, num=11, endpoint=True) # param_spec["voltage_thold_x_neg"] = np.linspace(-1., 0.0, num=11, endpoint=True) # param_spec["voltage_thold_y_pos"] = np.linspace(0.0, 1.0, num=11, endpoint=True) # param_spec["voltage_thold_y_neg"] = np.linspace(-1.0, 0, num=11, endpoint=True) # param_spec["offset_th_x"] = np.linspace(-5/180*np.pi, 5/180*np.pi, num=11, endpoint=True) # param_spec["offset_th_y"] = np.linspace(-5/180*np.pi, 5/180*np.pi, num=11, endpoint=True) else: raise NotImplementedError # Always add an action delay wrapper (with 0 delay by default) if typed_env(env, ActDelayWrapper) is None: env = ActDelayWrapper(env) # param_spec['act_delay'] = np.linspace(0, 30, num=11, endpoint=True, dtype=int) add_info = "-".join(param_spec.keys()) # Create multidimensional results grid and ensure right number of rollouts param_list = param_grid(param_spec) param_list *= args.num_rollouts_per_config # Fix initial state (set to None if it should not be fixed) init_state = np.array([0.0, 0.0, 0.0, 0.0]) # Create sampler pool = SamplerPool(args.num_workers) if args.seed is not None: pool.set_seed(args.seed) print_cbt(f"Set the random number generators' seed to {args.seed}.", "w") else: print_cbt("No seed was set", "y") # Sample rollouts ros = eval_domain_params(pool, env, policy, param_list, init_state) # Compute metrics lod = [] for ro in ros: d = dict(**ro.rollout_info["domain_param"], ret=ro.undiscounted_return(), len=ro.length) # Simply remove the observation noise from the domain parameters try: d.pop("obs_noise_mean") d.pop("obs_noise_std") except KeyError: pass lod.append(d) df = pd.DataFrame(lod) metrics = dict( avg_len=df["len"].mean(), avg_ret=df["ret"].mean(), median_ret=df["ret"].median(), min_ret=df["ret"].min(), max_ret=df["ret"].max(), std_ret=df["ret"].std(), ) pprint(metrics, indent=4) # Create subfolder and save timestamp = datetime.datetime.now() add_info = timestamp.strftime(pyrado.timestamp_format) + "--" + add_info save_dir = osp.join(ex_dir, "eval_domain_grid", add_info) os.makedirs(save_dir, exist_ok=True) save_dicts_to_yaml( {"ex_dir": str(ex_dir)}, {"varied_params": list(param_spec.keys())}, {"num_rpp": args.num_rollouts_per_config, "seed": args.seed}, {"metrics": dict_arraylike_to_float(metrics)}, save_dir=save_dir, file_name="summary", ) pyrado.save(df, f"df_sp_grid_{len(param_spec) if param_spec_dim is None else param_spec_dim}d.pkl", save_dir)