def exists(self, filesystem: FileSystem = None) -> bool: """Return True if the path points to an existing file or dir.""" if filesystem is not None: return filesystem.exists(str(self)) else: if self.is_hdfs: with HDFSFileSystem() as hdfs: return hdfs.exists(str(self)) else: return pathlib.Path(str(self)).exists()
def iterdir(self, filesystem: FileSystem = None) -> Generator["Path", None, None]: """Retrieve directory content.""" if filesystem is not None: return (Path(path) for path in list(filesystem.ls(str(self)))) else: if self.is_hdfs: with HDFSFileSystem() as hdfs: return (Path(path) for path in list(hdfs.ls(str(self)))) else: return (Path(str(path)) for path in pathlib.Path(str(self)).iterdir())
def is_file(self, filesystem: FileSystem = None) -> bool: """Return True if the path points to a regular file.""" if filesystem is not None: return filesystem.isfile(str(self)) else: if self.is_hdfs: with HDFSFileSystem() as hdfs: return hdfs.isfile(str(self)) else: return pathlib.Path(str(self)).is_file()
def delete(self, filesystem: FileSystem = None): """Delete file from filesystem""" if not self.is_file(filesystem=filesystem): raise FileNotFoundError(str(self)) if filesystem is not None: filesystem.delete(str(self)) else: if self.is_hdfs: with HDFSFileSystem() as hdfs: hdfs.delete(str(self)) else: pathlib.Path(str(self)).unlink()
def delete_dir(self, filesystem: FileSystem = None): """Delete dir from filesystem""" if not self.is_dir(filesystem=filesystem): raise FileNotFoundError(str(self)) if filesystem is not None: filesystem.rm(str(self), recursive=True) else: if self.is_hdfs: with HDFSFileSystem() as hdfs: hdfs.rm(str(self), recursive=True) else: shutil.rmtree(str(self))
def mkdir(self, parents: bool = False, exist_ok: bool = False, filesystem: FileSystem = None): """Create directory""" if self.is_dir(filesystem=filesystem): if exist_ok: return else: raise Exception(f"Directory {self} already exists.") if filesystem is not None: filesystem.mkdir(str(self)) else: if self.is_hdfs: with HDFSFileSystem() as hdfs: hdfs.mkdir(str(self)) else: pathlib.Path(str(self)).mkdir(parents=parents, exist_ok=exist_ok)
def open(self): """Open HDFS Filesystem if dataset on HDFS""" if self.filesystem is None and self.is_hdfs: with HDFSFileSystem() as hdfs: yield ParquetDataset( path_or_paths=self.path_or_paths, filesystem=hdfs, metadata=self.metadata, schema=self.schema, split_row_groups=self.split_row_groups, validate_schema=self.validate_schema, filters=self.filters, metadata_nthreads=self.metadata_nthreads, memory_map=self.memory_map, ) else: yield self