def _find_shared_fs_path(self) -> pathlib.Path: """Attempt to find the path of the checkpoint if being configured to shared fs. This function assumes the host path of the shared fs exists. """ host_path = self.experiment_config["checkpoint_storage"]["host_path"] storage_path = self.experiment_config["checkpoint_storage"].get( "storage_path") potential_paths = [ pathlib.Path(shared._full_storage_path(host_path, storage_path), self.uuid), pathlib.Path( shared._full_storage_path(host_path, storage_path, constants.SHARED_FS_CONTAINER_PATH), self.uuid, ), ] for path in potential_paths: if path.exists(): return path raise FileNotFoundError( "Checkpoint {} not found in {}. This error could be caused by not having " "the same shared file system mounted on the local machine as the experiment " "checkpoint storage configuration.".format(self.uuid, potential_paths))
def build(env: det.EnvContext, checkpoint_config: Dict[str, Any], container_path: Optional[str] = None) -> base.TensorboardManager: """ Return a tensorboard manager defined by the value of the `type` key in the configuration dictionary. Throws a `TypeError` if no tensorboard manager with `type` is defined. container_path, if set, will replace the host_path when determining the storage_path for the SharedFSTensorboardManager. """ type_name = checkpoint_config.get("type") if not type_name: raise TypeError("Missing 'type' parameter of storage configuration") if not isinstance(type_name, str): raise TypeError( "`type` parameter of storage configuration must be a string") base_path = get_base_path(checkpoint_config, manager=True) sync_path = get_sync_path(env) if type_name == "shared_fs": host_path = checkpoint_config["host_path"] storage_path = checkpoint_config.get("storage_path") return shared.SharedFSTensorboardManager( _full_storage_path(host_path, storage_path, container_path), base_path, sync_path, ) elif type_name == "gcs": return gcs.GCSTensorboardManager(checkpoint_config["bucket"], base_path, sync_path) elif type_name == "s3": return s3.S3TensorboardManager( checkpoint_config["bucket"], checkpoint_config.get("access_key", None), checkpoint_config.get("secret_key", None), checkpoint_config.get("endpoint_url", None), base_path, sync_path, ) # Return the base_path.TensorboardManager for known but unsupported storage # backends. This will result in a noop action when the workload_manager # attempts to sync the tfevent files to persistent storage. elif type_name == "hdfs": return hdfs.HDFSTensorboardManager( checkpoint_config["hdfs_url"], checkpoint_config["hdfs_path"], checkpoint_config.get("user"), base_path, sync_path, ) else: raise TypeError(f"Unknown storage type: {type_name}")
def _find_shared_fs_path(self) -> pathlib.Path: host_path = self.storage_config["host_path"] storage_path = self.storage_config.get("storage_path") potential_paths = [ [shared._full_storage_path(host_path, storage_path), self.uuid], [ shared._full_storage_path(host_path, storage_path, constants.SHARED_FS_CONTAINER_PATH), self.uuid, ], ] for path in potential_paths: maybe_ckpt = pathlib.Path(*path) if maybe_ckpt.exists(): return maybe_ckpt raise FileNotFoundError("Checkpoint {} not found".format(self.uuid))
def test_full_storage_path() -> None: with pytest.raises(check.CheckFailedError, match="`host_path` must be an absolute path"): shared._full_storage_path("host_path") path = shared._full_storage_path("/host_path") assert path == "/host_path" path = shared._full_storage_path("/host_path", container_path="cpath") assert path == "cpath" path = shared._full_storage_path("/host_path", "storage_path") assert path == "/host_path/storage_path" path = shared._full_storage_path("/host_path", "storage_path", container_path="cpath") assert path == "cpath/storage_path" path = shared._full_storage_path("/host_path", storage_path="/host_path/storage_path") assert path == "/host_path/storage_path" path = shared._full_storage_path("/host_path", "/host_path/storage_path", "cpath") assert path == "cpath/storage_path" with pytest.raises(check.CheckFailedError, match="must be a subdirectory"): shared._full_storage_path("/host_path", storage_path="/storage_path") with pytest.raises(check.CheckFailedError, match="must be a subdirectory"): shared._full_storage_path("/host_path", storage_path="/host_path/../test") with pytest.raises(check.CheckFailedError, match="must be a subdirectory"): shared._full_storage_path("/host_path", storage_path="../test")