Пример #1
0
 def exists(self, filesystem: FileSystem = None) -> bool:
     """Return True if the path points to an existing file or dir."""
     if filesystem is not None:
         return filesystem.exists(str(self))
     else:
         if self.is_hdfs:
             with HDFSFileSystem() as hdfs:
                 return hdfs.exists(str(self))
         else:
             return pathlib.Path(str(self)).exists()
Пример #2
0
 def iterdir(self, filesystem: FileSystem = None) -> Generator["Path", None, None]:
     """Retrieve directory content."""
     if filesystem is not None:
         return (Path(path) for path in list(filesystem.ls(str(self))))
     else:
         if self.is_hdfs:
             with HDFSFileSystem() as hdfs:
                 return (Path(path) for path in list(hdfs.ls(str(self))))
         else:
             return (Path(str(path)) for path in pathlib.Path(str(self)).iterdir())
Пример #3
0
 def is_file(self, filesystem: FileSystem = None) -> bool:
     """Return True if the path points to a regular file."""
     if filesystem is not None:
         return filesystem.isfile(str(self))
     else:
         if self.is_hdfs:
             with HDFSFileSystem() as hdfs:
                 return hdfs.isfile(str(self))
         else:
             return pathlib.Path(str(self)).is_file()
Пример #4
0
 def delete(self, filesystem: FileSystem = None):
     """Delete file from filesystem"""
     if not self.is_file(filesystem=filesystem):
         raise FileNotFoundError(str(self))
     if filesystem is not None:
         filesystem.delete(str(self))
     else:
         if self.is_hdfs:
             with HDFSFileSystem() as hdfs:
                 hdfs.delete(str(self))
         else:
             pathlib.Path(str(self)).unlink()
Пример #5
0
 def delete_dir(self, filesystem: FileSystem = None):
     """Delete dir from filesystem"""
     if not self.is_dir(filesystem=filesystem):
         raise FileNotFoundError(str(self))
     if filesystem is not None:
         filesystem.rm(str(self), recursive=True)
     else:
         if self.is_hdfs:
             with HDFSFileSystem() as hdfs:
                 hdfs.rm(str(self), recursive=True)
         else:
             shutil.rmtree(str(self))
Пример #6
0
 def mkdir(self, parents: bool = False, exist_ok: bool = False, filesystem: FileSystem = None):
     """Create directory"""
     if self.is_dir(filesystem=filesystem):
         if exist_ok:
             return
         else:
             raise Exception(f"Directory {self} already exists.")
     if filesystem is not None:
         filesystem.mkdir(str(self))
     else:
         if self.is_hdfs:
             with HDFSFileSystem() as hdfs:
                 hdfs.mkdir(str(self))
         else:
             pathlib.Path(str(self)).mkdir(parents=parents, exist_ok=exist_ok)
Пример #7
0
 def open(self):
     """Open HDFS Filesystem if dataset on HDFS"""
     if self.filesystem is None and self.is_hdfs:
         with HDFSFileSystem() as hdfs:
             yield ParquetDataset(
                 path_or_paths=self.path_or_paths,
                 filesystem=hdfs,
                 metadata=self.metadata,
                 schema=self.schema,
                 split_row_groups=self.split_row_groups,
                 validate_schema=self.validate_schema,
                 filters=self.filters,
                 metadata_nthreads=self.metadata_nthreads,
                 memory_map=self.memory_map,
             )
     else:
         yield self