""" Script to plot the GP's posterior after a Bayesian Domain Randomization experiment """ import os.path as osp import torch as to from matplotlib import pyplot as plt import pyrado from pyrado.logger.experiment import ask_for_experiment from pyrado.plotting.gaussian_process import render_singletask_gp from pyrado.utils.argparser import get_argparser if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() plt.rc('text', usetex=args.use_tex) # Get the experiment's directory to load from ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir cands = to.load(osp.join(ex_dir, 'candidates.pt')) cands_values = to.load(osp.join(ex_dir, 'candidates_values.pt')).unsqueeze(1) dim_cand = cands.shape[1] # number of domain distribution parameters if dim_cand%2 != 0: raise pyrado.ShapeErr(msg='The dimension of domain distribution parameters must be a multiple of 2!') # Select dimensions to plot (ignored for 1D mode) if len(args.idcs) != 2: raise pyrado.ShapeErr(msg='Select exactly 2 indices!')
from pyrado.algorithms.step_based.gae import GAE from pyrado.algorithms.step_based.ppo import PPO from pyrado.domain_randomization.domain_parameter import SelfPacedDomainParam from pyrado.domain_randomization.domain_randomizer import DomainRandomizer from pyrado.environment_wrappers.action_normalization import ActNormWrapper from pyrado.environment_wrappers.domain_randomization import DomainRandWrapperLive from pyrado.environments.pysim.quanser_qube import QQubeSwingUpSim from pyrado.logger.experiment import save_dicts_to_yaml, setup_experiment from pyrado.policies.feed_back.fnn import FNNPolicy from pyrado.spaces import ValueFunctionSpace from pyrado.utils.argparser import get_argparser from pyrado.utils.data_types import EnvSpec if __name__ == "__main__": # Parse command line arguments parser = get_argparser() parser.add_argument("--frequency", default=250, type=int) parser.set_defaults(max_steps=600) parser.add_argument("--ppo_iterations", default=150, type=int) parser.add_argument("--sprl_iterations", default=50, type=int) parser.add_argument("--cov_only", action="store_true") args = parser.parse_args() # Experiment (set seed before creating the modules) ex_dir = setup_experiment( QQubeSwingUpSim.name, f"{PPO.name}_{FNNPolicy.name}", f"{args.frequency}Hz_{args.max_steps}ROLen_{args.ppo_iterations}PPOIter_{args.sprl_iterations}SPRLIter_cov_only{args.cov_only}_seed_{args.seed}", ) # Set seed if desired
def load_experiment( ex_dir: str, args: Any = None) -> (Union[SimEnv, EnvWrapper], Policy, dict): """ Load the (training) environment and the policy. This helper function first tries to read the hyper-parameters yaml-file in the experiment's directory to infer why entities should be loaded. If no file was found, we fall back to some heuristic and hope for the best. :param ex_dir: experiment's parent directory :param args: arguments from the argument parser, pass `None` to fall back to the values from the default argparser :return: environment, policy, and optional output (e.g. valuefcn) """ env, policy, extra = None, None, dict() if args is None: # Fall back to default arguments. By passing [], we ignore the command line arguments args = get_argparser().parse_args([]) # Hyper-parameters hparams_file_name = 'hyperparams.yaml' try: hparams = load_dict_from_yaml(osp.join(ex_dir, hparams_file_name)) extra['hparams'] = hparams except (pyrado.PathErr, FileNotFoundError, KeyError): print_cbt( f'Did not find {hparams_file_name} in {ex_dir} or could not crawl the loaded hyper-parameters.', 'y', bright=True) # Algorithm specific algo = Algorithm.load_snapshot(load_dir=ex_dir, load_name='algo') if isinstance(algo, BayRn): # Environment env = pyrado.load(None, 'env_sim', 'pkl', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, 'env_sim.pkl')}.", 'g') if hasattr(env, 'randomizer'): last_cand = to.load(osp.join(ex_dir, 'candidates.pt'))[-1, :] env.adapt_randomizer(last_cand.numpy()) print_cbt(f'Loaded the domain randomizer\n{env.randomizer}', 'w') else: print_cbt('Loaded environment has no randomizer.', 'r') # Policy policy = pyrado.load(algo.policy, f'{args.policy_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", 'g') # Extra (value function) if isinstance(algo.subroutine, ActorCritic): extra['vfcn'] = pyrado.load(algo.subroutine.critic.vfcn, f'{args.vfcn_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.vfcn_name}.pt')}", 'g') elif isinstance(algo, SPOTA): # Environment env = pyrado.load(None, 'env', 'pkl', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, 'env.pkl')}.", 'g') if hasattr(env, 'randomizer'): if not isinstance(env.randomizer, DomainRandWrapperBuffer): raise pyrado.TypeErr(given=env.randomizer, expected_type=DomainRandWrapperBuffer) typed_env(env, DomainRandWrapperBuffer).fill_buffer(100) print_cbt( f"Loaded {osp.join(ex_dir, 'env.pkl')} and filled it with 100 random instances.", 'g') else: print_cbt('Loaded environment has no randomizer.', 'r') # Policy policy = pyrado.load(algo.subroutine_cand.policy, f'{args.policy_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", 'g') # Extra (value function) if isinstance(algo.subroutine_cand, ActorCritic): extra['vfcn'] = pyrado.load(algo.subroutine_cand.critic.vfcn, f'{args.vfcn_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.vfcn_name}.pt')}", 'g') elif isinstance(algo, SimOpt): # Environment env = pyrado.load(None, 'env_sim', 'pkl', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, 'env_sim.pkl')}.", 'g') if hasattr(env, 'randomizer'): last_cand = to.load(osp.join(ex_dir, 'candidates.pt'))[-1, :] env.adapt_randomizer(last_cand.numpy()) print_cbt(f'Loaded the domain randomizer\n{env.randomizer}', 'w') else: print_cbt('Loaded environment has no randomizer.', 'r') # Policy policy = pyrado.load(algo.subroutine_policy.policy, f'{args.policy_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", 'g') # Extra (domain parameter distribution policy) extra['ddp_policy'] = pyrado.load(algo.subroutine_distr.policy, 'ddp_policy', 'pt', ex_dir, None) elif isinstance(algo, (EPOpt, UDR)): # Environment env = pyrado.load(None, 'env_sim', 'pkl', ex_dir, None) if hasattr(env, 'randomizer'): if not isinstance(env.randomizer, DomainRandWrapperLive): raise pyrado.TypeErr(given=env.randomizer, expected_type=DomainRandWrapperLive) print_cbt( f"Loaded {osp.join(ex_dir, 'env.pkl')} with DomainRandWrapperLive randomizer.", 'g') else: print_cbt('Loaded environment has no randomizer.', 'y') # Policy policy = pyrado.load(algo.policy, f'{args.policy_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", 'g') # Extra (value function) if isinstance(algo.subroutine, ActorCritic): extra['vfcn'] = pyrado.load(algo.subroutine.critic.vfcn, f'{args.vfcn_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.vfcn_name}.pt')}", 'g') elif isinstance(algo, ActorCritic): # Environment env = pyrado.load(None, 'env', 'pkl', ex_dir, None) # Policy policy = pyrado.load(algo.policy, f'{args.policy_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", 'g') # Extra (value function) extra['vfcn'] = pyrado.load(algo.critic.vfcn, f'{args.vfcn_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.vfcn_name}.pt')}", 'g') elif isinstance(algo, ParameterExploring): # Environment env = pyrado.load(None, 'env', 'pkl', ex_dir, None) # Policy policy = pyrado.load(algo.policy, f'{args.policy_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", 'g') elif isinstance(algo, ValueBased): # Environment env = pyrado.load(None, 'env', 'pkl', ex_dir, None) # Policy policy = pyrado.load(algo.policy, f'{args.policy_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", 'g') # Target value functions if isinstance(algo, DQL): extra['qfcn_target'] = pyrado.load(algo.qfcn_targ, 'qfcn_target', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, 'qfcn_target.pt')}", 'g') elif isinstance(algo, SAC): extra['qfcn_target1'] = pyrado.load(algo.qfcn_targ_1, 'qfcn_target1', 'pt', ex_dir, None) extra['qfcn_target2'] = pyrado.load(algo.qfcn_targ_2, 'qfcn_target2', 'pt', ex_dir, None) print_cbt( f"Loaded {osp.join(ex_dir, 'qfcn_target1.pt')} and {osp.join(ex_dir, 'qfcn_target2.pt')}", 'g') else: raise NotImplementedError elif isinstance(algo, SVPG): # Environment env = pyrado.load(None, 'env', 'pkl', ex_dir, None) # Policy policy = pyrado.load(algo.policy, f'{args.policy_name}', 'pt', ex_dir, None) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", 'g') # Extra (particles) for idx, p in enumerate(algo.particles): extra[f'particle{idx}'] = pyrado.load(algo.particles[idx], f'particle_{idx}', 'pt', ex_dir, None) elif isinstance(algo, TSPred): # Dataset extra['dataset'] = to.load(osp.join(ex_dir, 'dataset.pt')) # Policy policy = pyrado.load(algo.policy, f'{args.policy_name}', 'pt', ex_dir, None) else: raise pyrado.TypeErr( msg= 'No matching algorithm name found during loading the experiment!') # Check if the return types are correct. They can be None, too. if env is not None and not isinstance(env, (SimEnv, EnvWrapper)): raise pyrado.TypeErr(given=env, expected_type=[SimEnv, EnvWrapper]) if policy is not None and not isinstance(policy, Policy): raise pyrado.TypeErr(given=policy, expected_type=Policy) if extra is not None and not isinstance(extra, dict): raise pyrado.TypeErr(given=extra, expected_type=dict) return env, policy, extra
def load_experiment( ex_dir: str, args: Any = None ) -> Tuple[Optional[Union[SimEnv, EnvWrapper]], Optional[Policy], Optional[dict]]: """ Load the (training) environment and the policy. This helper function first tries to read the hyper-parameters yaml-file in the experiment's directory to infer why entities should be loaded. If no file was found, we fall back to some heuristic and hope for the best. :param ex_dir: experiment's parent directory :param args: arguments from the argument parser, pass `None` to fall back to the values from the default argparser :return: environment, policy, and optional output (e.g. valuefcn) """ env, policy, extra = None, None, dict() if args is None: # Fall back to default arguments. By passing [], we ignore the command line arguments args = get_argparser().parse_args([]) # Hyper-parameters extra["hparams"] = load_hyperparameters(ex_dir) # Algorithm specific algo = Algorithm.load_snapshot(load_dir=ex_dir, load_name="algo") if algo.name == "spota": # Environment env = pyrado.load("env.pkl", ex_dir) if getattr(env, "randomizer", None) is not None: if not isinstance(env, DomainRandWrapperBuffer): raise pyrado.TypeErr(given=env, expected_type=DomainRandWrapperBuffer) typed_env(env, DomainRandWrapperBuffer).fill_buffer(10) print_cbt( f"Loaded the domain randomizer\n{env.randomizer}\nand filled it with 10 random instances.", "w") else: print_cbt("Loaded environment has no randomizer, or it is None.", "r") # Policy policy = pyrado.load(algo.subroutine_cand.policy, f"{args.policy_name}.pt", ex_dir, verbose=True) # Extra (value function) if isinstance(algo.subroutine_cand, ActorCritic): extra["vfcn"] = pyrado.load(algo.subroutine_cand.critic.vfcn, f"{args.vfcn_name}.pt", ex_dir, verbose=True) elif algo.name == "bayrn": # Environment env = pyrado.load("env_sim.pkl", ex_dir) if hasattr(env, "randomizer"): last_cand = to.load(osp.join(ex_dir, "candidates.pt"))[-1, :] env.adapt_randomizer(last_cand.numpy()) print_cbt(f"Loaded the domain randomizer\n{env.randomizer}", "w") else: print_cbt("Loaded environment has no randomizer, or it is None.", "r") # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy, verbose=True) # Extra (value function) if isinstance(algo.subroutine, ActorCritic): extra["vfcn"] = pyrado.load(f"{args.vfcn_name}.pt", ex_dir, obj=algo.subroutine.critic.vfcn, verbose=True) elif algo.name == "simopt": # Environment env = pyrado.load("env_sim.pkl", ex_dir) if getattr(env, "randomizer", None) is not None: last_cand = to.load(osp.join(ex_dir, "candidates.pt"))[-1, :] env.adapt_randomizer(last_cand.numpy()) print_cbt(f"Loaded the domain randomizer\n{env.randomizer}", "w") else: print_cbt("Loaded environment has no randomizer, or it is None.", "r") # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.subroutine_policy.policy, verbose=True) # Extra (domain parameter distribution policy) extra["ddp_policy"] = pyrado.load("ddp_policy.pt", ex_dir, obj=algo.subroutine_distr.policy, verbose=True) elif algo.name in ["epopt", "udr"]: # Environment env = pyrado.load("env_sim.pkl", ex_dir) if getattr(env, "randomizer", None) is not None: if not isinstance(env, DomainRandWrapperLive): raise pyrado.TypeErr(given=env, expected_type=DomainRandWrapperLive) print_cbt(f"Loaded the domain randomizer\n{env.randomizer}", "w") else: print_cbt("Loaded environment has no randomizer, or it is None.", "y") # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy, verbose=True) # Extra (value function) if isinstance(algo.subroutine, ActorCritic): extra["vfcn"] = pyrado.load(f"{args.vfcn_name}.pt", ex_dir, obj=algo.subroutine.critic.vfcn, verbose=True) elif algo.name in ["bayessim", "npdr"]: # Environment env = pyrado.load("env_sim.pkl", ex_dir) if getattr(env, "randomizer", None) is not None: if not isinstance(env, DomainRandWrapperBuffer): raise pyrado.TypeErr(given=env, expected_type=DomainRandWrapperBuffer) typed_env(env, DomainRandWrapperBuffer).fill_buffer(10) print_cbt( f"Loaded the domain randomizer\n{env.randomizer}\nand filled it with 10 random instances.", "w") else: print_cbt("Loaded environment has no randomizer, or it is None.", "y") env = remove_all_dr_wrappers(env, verbose=True) # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy, verbose=True) # Extra (prior, posterior, data) extra["prior"] = pyrado.load("prior.pt", ex_dir, verbose=True) # By default load the latest posterior (latest iteration and the last round) try: extra["posterior"] = algo.load_posterior(ex_dir, args.iter, args.round, obj=None, verbose=True) # Load the complete data or the data of the given iteration prefix = "" if args.iter == -1 else f"iter_{args.iter}" extra["data_real"] = pyrado.load(f"data_real.pt", ex_dir, prefix=prefix, verbose=True) except FileNotFoundError: pass elif algo.name in ["a2c", "ppo", "ppo2"]: # Environment env = pyrado.load("env.pkl", ex_dir) # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy, verbose=True) # Extra (value function) extra["vfcn"] = pyrado.load(f"{args.vfcn_name}.pt", ex_dir, obj=algo.critic.vfcn, verbose=True) elif algo.name in ["hc", "pepg", "power", "cem", "reps", "nes"]: # Environment env = pyrado.load("env.pkl", ex_dir) # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy, verbose=True) elif algo.name in ["dql", "sac"]: # Environment env = pyrado.load("env.pkl", ex_dir) # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy, verbose=True) # Target value functions if algo.name == "dql": extra["qfcn_target"] = pyrado.load("qfcn_target.pt", ex_dir, obj=algo.qfcn_targ, verbose=True) elif algo.name == "sac": extra["qfcn_target1"] = pyrado.load("qfcn_target1.pt", ex_dir, obj=algo.qfcn_targ_1, verbose=True) extra["qfcn_target2"] = pyrado.load("qfcn_target2.pt", ex_dir, obj=algo.qfcn_targ_2, verbose=True) else: raise NotImplementedError elif algo.name == "svpg": # Environment env = pyrado.load("env.pkl", ex_dir) # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy, verbose=True) # Extra (particles) for idx, p in enumerate(algo.particles): extra[f"particle{idx}"] = pyrado.load(f"particle_{idx}.pt", ex_dir, obj=algo.particles[idx], verbose=True) elif algo.name == "tspred": # Dataset extra["dataset"] = to.load(osp.join(ex_dir, "dataset.pt")) # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy, verbose=True) elif algo.name == "sprl": # Environment env = pyrado.load("env.pkl", ex_dir) print_cbt(f"Loaded {osp.join(ex_dir, 'env.pkl')}.", "g") # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy) print_cbt(f"Loaded {osp.join(ex_dir, f'{args.policy_name}.pt')}", "g") # Extra (value function) if isinstance(algo._subroutine, ActorCritic): extra["vfcn"] = pyrado.load(f"{args.vfcn_name}.pt", ex_dir, obj=algo._subroutine.critic.vfcn, verbose=True) elif algo.name == "pddr": # Environment env = pyrado.load("env.pkl", ex_dir) # Policy policy = pyrado.load(f"{args.policy_name}.pt", ex_dir, obj=algo.policy, verbose=True) # Teachers extra["teacher_policies"] = algo.teacher_policies extra["teacher_envs"] = algo.teacher_envs extra["teacher_expl_strats"] = algo.teacher_expl_strats extra["teacher_critics"] = algo.teacher_critics extra["teacher_ex_dirs"] = algo.teacher_ex_dirs else: raise pyrado.TypeErr( msg= "No matching algorithm name found during loading the experiment!") # Check if the return types are correct. They can be None, too. if env is not None and not isinstance(env, (SimEnv, EnvWrapper)): raise pyrado.TypeErr(given=env, expected_type=[SimEnv, EnvWrapper]) if policy is not None and not isinstance(policy, Policy): raise pyrado.TypeErr(given=policy, expected_type=Policy) if extra is not None and not isinstance(extra, dict): raise pyrado.TypeErr(given=extra, expected_type=dict) return env, policy, extra
def _main(): # Parse command line arguments argparser = get_argparser() argparser.add_argument( "--average", action="store_true", help= "average over all loaded policies (default: False); create only a single heatmap", ) argparser.add_argument( "--save_dir", help="if --average is set, the directory to save the plot to") args = argparser.parse_args() # Get the experiment's directory to load from if args.dir is None: ex_dirs = [] while True: ex_dirs.append( ask_for_experiment( show_hyper_parameters=args.show_hyperparameters, max_display=50)) if input("Ask for more (Y/n)? ") == "n": break else: ex_dirs = [d.strip() for d in args.dir.split(",")] eval_parent_dirs = [] for ex_dir in ex_dirs: eval_parent_dir = osp.join(ex_dir, "eval_domain_grid") if not osp.isdir(eval_parent_dir): raise pyrado.PathErr(given=eval_parent_dir) eval_parent_dirs.append(eval_parent_dir) if args.load_all: list_eval_dirs = [] for eval_parent_dir in eval_parent_dirs: list_eval_dirs += [tmp[0] for tmp in os.walk(eval_parent_dir)][1:] else: list_eval_dirs = [ osp.join(eval_parent_dir, "ENV_NAME", "ALGO_NAME") for eval_parent_dir in eval_parent_dirs ] dataframes, eval_dirs = [], [] for eval_dir in list_eval_dirs: assert osp.isdir(eval_dir) # Load the data pickle_file = osp.join(eval_dir, "df_sp_grid_2d.pkl") if not osp.isfile(pickle_file): print(f"{pickle_file} is not a file! Skipping...") continue df = pd.read_pickle(pickle_file) dataframes.append(df) eval_dirs.append(eval_dir) if args.average: _plot([sum(dataframes) / len(dataframes)], [args.save_dir], True) else: _plot(dataframes, eval_dirs, args.save)