def load_rollouts_from_dir( ex_dir: str, key: Optional[str] = "rollout", file_exts: Tuple[str] = ("pt", "pkl") ) -> Tuple[List[StepSequence], List[str]]: """ Crawl through the given directory, sort the files, and load all rollouts, i.e. all files that include the key. :param ex_dir: directory, e.g. and experiment folder :param key: word or part of a word that needs to the in the name of a file for it to be loaded :param file_exts: file extensions to be considered for loading :return: list of loaded rollouts, and list of file names without extension """ if not osp.isdir(ex_dir): raise pyrado.PathErr(given=ex_dir) if not isinstance(key, str): raise pyrado.TypeErr(given=key, expected_type=str) if not is_iterable(file_exts): raise pyrado.TypeErr(given=file_exts, expected_type=Iterable) rollouts = [] names = [] for root, dirs, files in os.walk(ex_dir): dirs.clear() # prevents walk() from going into subdirectories natural_sort(files) for f in files: f_ext = f[f.rfind(".") + 1:] if key in f and f_ext in file_exts: name = f[:f.rfind(".")] names.append(name) rollouts.append(pyrado.load(f"{name}.{f_ext}", load_dir=root)) if not rollouts: raise pyrado.ValueErr(msg="No rollouts have been found!") if isinstance(rollouts[0], list): if not check_all_types_equal(rollouts): raise pyrado.TypeErr( msg= "Some rollout savings contain lists of rollouts, others don't!" ) # The rollout files contain lists of rollouts, flatten them rollouts = list(itertools.chain(*rollouts)) return rollouts, names
def eval_init_policies(self): """ Execute the trained initial policies on the target device and store the estimated return per candidate. The number of initial policies to evaluate is the number of found policies. """ # Crawl through the experiment's directory for root, dirs, files in os.walk(self.save_dir): dirs.clear() # prevents walk() from going into subdirectories found_policies = [ p for p in files if p.startswith("init_") and p.endswith("_policy.pt") ] found_cands = [ c for c in files if c.startswith("init_") and c.endswith("_candidate.pt") ] if not len(found_policies) == len(found_cands): raise pyrado.ValueErr( msg= "Found a different number of initial policies than candidates!" ) elif len(found_policies) == 0: raise pyrado.ValueErr(msg="No policies or candidates found!") num_init_cand = len(found_cands) cands_values = to.empty(num_init_cand) # Load all found candidates to save them into a single tensor found_cands = natural_sort( found_cands ) # the order is important since it determines the rows of the tensor cands = to.stack( [to.load(osp.join(self.save_dir, c)) for c in found_cands]) # Evaluate learned policies from random candidates on the target environment (real-world) system for i in range(num_init_cand): policy = pyrado.load("policy.pt", self.save_dir, prefix=f"init_{i}", obj=self.policy) cands_values[i] = self.eval_policy( self.save_dir, self._env_real, policy, self.mc_estimator, prefix=f"init_{i}", num_rollouts=self.num_eval_rollouts_real, num_workers=self.num_workers, ) # Save candidates and their returns into tensors (policy is saved during training or exists already) pyrado.save(cands_values, "candidates_values.pt", self.save_dir) self.cands, self.cands_values = cands, cands_values
def load_hyperparameters(ex_dir: pyrado.PathLike, verbose: bool = True) -> Union[dict, Optional[dict]]: """ Loads the hyper-parameters-dict from the given experiment directory. The hyper-parameters file is assumed to be named `hyperparams.yaml`. :param ex_dir: experiment's directory to load from :param verbose: if `True`, print message if no hyper-parameter file was found """ hparams_file_name = "hyperparams.yaml" for root, dirs, files in os.walk(ex_dir): dirs.clear() # prevents walk() from going into subdirectories natural_sort(files) if hparams_file_name in files: # Default case return load_dict_from_yaml(osp.join(ex_dir, hparams_file_name)) for file in files: # Recursively merge the hyper-parameter configurations if file.startswith("hparam") and file.endswith(".yaml"): hparam_args = load_dict_from_yaml(osp.join(ex_dir, file)) setting_args = load_dict_from_yaml( osp.join(ex_dir, "settings.yaml")) return update_matching_keys_recursively( setting_args, hparam_args) # No hyper-parameter file was found if verbose: print_cbt( f"Did not find {hparams_file_name} in {ex_dir} or could not crawl the loaded hyper-parameters.", "y", bright=True, ) return None
if not c.startswith("init_") and c.endswith("_candidate.pt") ] # Check if not found_policies: raise pyrado.ShapeErr(msg="No policies found!") if not found_cands: raise pyrado.ShapeErr(msg="No candidates found!") if len(found_policies) != len(found_cands): # don't count the final policy raise pyrado.ShapeErr( msg= f"Found {len(found_policies)} initial policies but {len(found_cands)} candidates!" ) # Sort found_policies = natural_sort(found_policies) found_cands = natural_sort(found_cands) # Plot the candidate values fig, ax = plt.subplots(1) for i in range(len(found_cands)): cand = to.load(osp.join(ex_dir, found_cands[i])).numpy() ax.scatter(np.arange(cand.size), cand, label=r"$\phi_{" + str(i) + "}$", c=f"C{i%10}", s=16) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_ylabel("parameter value") ax.set_xlabel("parameter index") plt.legend()
if __name__ == "__main__": # Parse command line arguments args = get_argparser().parse_args() plt.rc("text", usetex=args.use_tex) # Get the experiments' directories to load from if args.dir is None: parent_dir = input( "Please enter the directory for the experiments to compare:\n") else: parent_dir = args.dir if not osp.isdir(parent_dir): raise pyrado.PathErr(given=parent_dir) dirs = get_immediate_subdirs(parent_dir) dirs = natural_sort(dirs) # Collect average and best returns per iteration df = pd.DataFrame() best_returns = [] # Plot progress of each experiment fig, axs = plt.subplots(2, figsize=(12, 8)) for idx, d in enumerate(dirs): # Load an experiment's data file = os.path.join(d, "progress.csv") data = read_csv_w_replace(file) # Append one column per experiment df = pd.concat([df, pd.DataFrame({f"ex_{idx}": data.avg_return})], axis=1)
def __init__( self, name: str, parent_dir: str, incl_pattern: str = None, excl_pattern: str = None, latest_evals_only: bool = False, eval_subdir_name: str = "evaluation", sort: bool = False, ): """ Constructor :param name: label for the data, e.g. name of the algorithm :param parent_dir: path to the algorithm's directory :param incl_pattern: only include experiments if their names partially contain the include pattern :param excl_pattern: exclude experiments if their names do not even partially contain the exclude pattern :param latest_evals_only: if `True` only the very latest evaluation file is loaded to estimate the returns :param sort: sort the found experiments by name, i.e. by date """ if not osp.isdir(parent_dir): raise pyrado.PathErr(given=parent_dir) if incl_pattern is not None and not isinstance(incl_pattern, str): raise pyrado.TypeErr(given=incl_pattern, expected_type=str) if excl_pattern is not None and not isinstance(excl_pattern, str): raise pyrado.TypeErr(given=excl_pattern, expected_type=str) self.name = name self.parent_dir = parent_dir self.incl_pattern = incl_pattern self.excl_pattern = excl_pattern self.latest_evals_only = latest_evals_only self.eval_subdir_name = eval_subdir_name # Include experiments self.matches = get_immediate_subdirs(parent_dir) if sort: self.matches = natural_sort(self.matches) if self.incl_pattern is not None: # Only include experiments if their names partially contain the include pattern self.matches = list(filter(lambda d: self.incl_pattern in d, self.matches)) if self.excl_pattern is not None: # Exclude experiments if their names do not even partially contain the exclude pattern self.matches = list(filter(lambda d: self.excl_pattern not in d, self.matches)) self._returns_est_per_ex = [] self.returns_est = [] cnt_nonexist_dirs = 0 for match in self.matches: # Get the evaluation subdirectory eval_dir = osp.join(match, self.eval_subdir_name) if osp.exists(eval_dir): # Crawl through the experiment's evaluation directory rets = [] # empirical returns from the experiments num_samples = [] # number of samples per return estimate for root, dirs, files in os.walk(eval_dir): files.sort(reverse=True) # in case there are multiple evaluations # Only include the latest evaluation found in the folder if flag is set for f in files if not self.latest_evals_only else files[:1]: if f.endswith(".npy"): rets.append(np.load(osp.join(eval_dir, f))) num_samples.append(len(rets)) elif f.endswith(".pt"): rets.append(to.load(osp.join(eval_dir, f)).cpu().numpy()) else: raise FileNotFoundError else: cnt_nonexist_dirs += 1 # Store the estimated return per evaluation run (averaged over individual evaluations) self._returns_est_per_ex.append(np.mean(np.asarray(rets), axis=1)) self.returns_est.extend(np.mean(np.asarray(rets), axis=1)) # Print what has been loaded ex_names = ["..." + m[m.rfind("/") :] for m in self.matches] # cut off everything until the experiment's name print( tabulate( [[ex_name, ret] for ex_name, ret in zip(ex_names, self._returns_est_per_ex)], headers=["Loaded directory", "Returns averaged per experiment"], ) ) if cnt_nonexist_dirs == 0: print_cbt("All evaluation sub-directories have been found.", "g") else: print_cbt(f"{cnt_nonexist_dirs} evaluation sub-directories have been missed.", "y")