def gather_tb_directories( source_dir: str, run_name: Optional[str], env_name: Optional[str], skip_failed_runs: bool, ) -> dict: """Gather Tensorboard directories from a `parallel_ex` run. The directories are copied to a unique directory in `/tmp/analysis_tb/` under subdirectories matching the Tensorboard events' Ray Tune trial names. Undocumented arguments are the same as in `analyze_imitation()`. Args: source_dir: A local_dir for Ray. For example, `~/ray_results/`. Returns: A dict with two keys. "gather_dir" (str) is a path to a /tmp/ directory containing all the TensorBoard runs filtered from `source_dir`. "n_tb_dirs" (int) is the number of TensorBoard directories that were filtered. """ sacred_dicts = _get_sacred_dicts(source_dir, run_name, env_name, skip_failed_runs) os.makedirs("/tmp/analysis_tb", exist_ok=True) tmp_dir = tempfile.mkdtemp(dir="/tmp/analysis_tb/") tb_dirs_count = 0 for sd in sacred_dicts: # Expecting a path like "~/ray_results/{run_name}/sacred/1". # Want to search for all Tensorboard dirs inside # "~/ray_results/{run_name}". sacred_dir = sd.sacred_dir.rstrip("/") run_dir = osp.dirname(osp.dirname(sacred_dir)) run_name = osp.basename(run_dir) # "tb" is TensorBoard directory built by our codebase. "sb_tb" is Stable # Baselines TensorBoard directory. There should be at most one of each # directory. for basename in ["tb", "sb_tb"]: tb_src_dirs = tuple( sacred_util.filter_subdirs( run_dir, lambda path: osp.basename(path) == basename)) if tb_src_dirs: assert len( tb_src_dirs) == 1, "expect at most one TB dir of each type" tb_src_dir = tb_src_dirs[0] symlinks_dir = osp.join(tmp_dir, basename) os.makedirs(symlinks_dir, exist_ok=True) tb_symlink = osp.join(symlinks_dir, run_name) os.symlink(tb_src_dir, tb_symlink) tb_dirs_count += 1 tf.logging.info( f"Symlinked {tb_dirs_count} TensorBoard dirs to {tmp_dir}.") tf.logging.info( f"Start Tensorboard with `tensorboard --logdir {tmp_dir}`.") return {"n_tb_dirs": tb_dirs_count, "gather_dir": tmp_dir}
def _get_sacred_dicts(source_dir: str, run_name: str, env_name: str, skip_failed_runs: bool) -> List[sacred_util.SacredDicts]: sacred_dirs = sacred_util.filter_subdirs(source_dir) sacred_dicts = [ sacred_util.SacredDicts.load_from_dir(sacred_dir) for sacred_dir in sacred_dirs ] if run_name is not None: sacred_dicts = filter( lambda sd: get(sd.run, "experiment.name") == run_name, sacred_dicts) if env_name is not None: sacred_dicts = filter( lambda sd: get(sd.config, "env_name") == env_name, sacred_dicts) if skip_failed_runs: sacred_dicts = filter(lambda sd: get(sd.run, "status") != "FAILED", sacred_dicts) return list(sacred_dicts)