def test_strip_protocol_expanduser(): path = "file://~\\foo\\bar" if WIN else "file://~/foo/bar" stripped = LocalFileSystem._strip_protocol(path) assert path != stripped assert "file://" not in stripped assert stripped.startswith(os.path.expanduser("~").replace("\\", "/")) assert not LocalFileSystem._strip_protocol("./").endswith("/")
def test_linked_files_exists(tmpdir): origin = tmpdir / "original" copy_file = tmpdir / "copy" fs = LocalFileSystem() fs.touch(origin) try: os.symlink(origin, copy_file) except OSError: if WIN: pytest.xfail("Ran on win without admin permissions") else: raise assert fs.exists(copy_file) assert fs.lexists(copy_file) os.unlink(origin) assert not fs.exists(copy_file) assert fs.lexists(copy_file) os.unlink(copy_file) assert not fs.exists(copy_file) assert not fs.lexists(copy_file)
def test_isdir(): fs = LocalFileSystem() with filetexts(files, mode="b"): for f in files.keys(): assert fs.isdir(os.path.dirname(os.path.abspath(f))) assert not fs.isdir(f) assert not fs.isdir("not-a-dir")
def filesystem() -> AbstractFileSystem: fs = LocalFileSystem() endpoint_url = os.getenv("LIGHTNING_BUCKET_ENDPOINT_URL", "") bucket_name = os.getenv("LIGHTNING_BUCKET_NAME", "") if endpoint_url != "" and bucket_name != "": key = os.getenv("LIGHTNING_AWS_ACCESS_KEY_ID", "") secret = os.getenv("LIGHTNING_AWS_SECRET_ACCESS_KEY", "") # TODO: Remove when updated on the platform side. if key == "" or secret == "": key = os.getenv("AWS_ACCESS_KEY_ID", "") secret = os.getenv("AWS_SECRET_ACCESS_KEY", "") if key == "" or secret == "": raise RuntimeError("missing S3 bucket credentials") fs = S3FileSystem(key=key, secret=secret, use_ssl=False, client_kwargs={"endpoint_url": endpoint_url}) app_id = os.getenv("LIGHTNING_CLOUD_APP_ID", "") if app_id == "": raise RuntimeError("missing LIGHTNING_CLOUD_APP_ID") if not fs.exists(shared_storage_path()): raise RuntimeError( f"shared filesystem {shared_storage_path()} does not exist") return fs
def test_invalid_json(self): fs = LocalFileSystem() with fs.open(from_root('/test/sample_data/bad_json.json')) as f: schema = from_file(f, {}) assert(isinstance(schema, InvalidSchema)) message = f"File type not supported for file {from_root('/test/sample_data/bad_json.json')}. Type: ASCII text, with no line terminators" assert(message in schema.reason)
def test_valid_csv(self): fs = LocalFileSystem() with fs.open(from_root('/test/sample_data/csv_sample.csv')) as f: schema = from_file(f, {"read_headers": True}) assert(isinstance(schema, TextSchema)) assert(list(map(lambda c: c.name, schema.columns)) == ["type","price"]) assert(list(map(lambda c: c.type,schema.columns)) == ["object","float64"])
def test_csv_no_header(self): fs = LocalFileSystem() with fs.open(from_root('/test/sample_data/csv_no_header.csv')) as f: schema = from_file(f) assert(isinstance(schema, TextSchema)) assert(list(map(lambda c: c.name,schema.columns)) == [0,1]) assert(list(map(lambda c: c.type,schema.columns)) == ["object","float64"])
def copy(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwargs): """ This method copies the contents of the local source directory to the target directory. This is different from the fsspec's put() because it does not copy the source folder to the target directory in the case when target directory already exists. """ from fsspec.implementations.local import LocalFileSystem, make_path_posix from fsspec.utils import other_paths rpath = (self.fs._strip_protocol(rpath) if isinstance(rpath, str) else [self.fs._strip_protocol(p) for p in rpath]) if isinstance(lpath, str): lpath = make_path_posix(lpath) fs = LocalFileSystem() lpaths = fs.expand_path(lpath, recursive=recursive) rpaths = other_paths(lpaths, rpath) callback.set_size(len(rpaths)) for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): callback.branch(lpath, rpath, kwargs) self.fs.put_file(lpath, rpath, **kwargs)
def __init__( self, path: str, filesystem: AbstractFileSystem = None, time_travel: datetime = None, ): """Initializes a Delta Lake Retrieves rows pertaining to the given keys from the Table instance represented by table_handle. String keys will be UTF-8 encoded. Args: path: the path to the table on the filesystem filesystem: python-like filesystem (If unset, assume local) time_travel: set the delta lake to a specific version Returns: An instance of a delta table. """ if not filesystem: self.filesystem = LocalFileSystem(path) else: self.filesystem = filesystem self.path = path self._set_timestamp(time_travel) self.checkpoint_info = self._get_checkpoint_info() self.fileset = set()
def write_bids( to: PathLike, participants: DataFrame, sessions: DataFrame, scans: DataFrame, ) -> List[PathLike]: from pathlib import Path from fsspec.implementations.local import LocalFileSystem to = Path(to) fs = LocalFileSystem(auto_mkdir=True) # Ensure BIDS hierarchy is written first. with fs.transaction: with fs.open(to / "participants.tsv", "w") as participant_file: write_to_tsv(participants, participant_file) for participant_id, sessions_group in sessions.groupby( "participant_id"): sessions_group = sessions_group.droplevel("participant_id") sessions_filepath = to / participant_id / f"{participant_id}_sessions.tsv" with fs.open(sessions_filepath, "w") as sessions_file: write_to_tsv(sessions_group, sessions_file) # Perform import of imaging data next. for filename, metadata in scans.iterrows(): if metadata.format == "DCM": convert_dicom(sourcedata_dir=metadata.source_dir, bids_filename=to / filename) else: install_nifti(sourcedata_dir=metadata.source_dir, bids_filename=to / filename) return scans.index.to_list()
def test_file_not_supported(self): logger.set_level("error") fs = LocalFileSystem() with fs.open(from_root('/test/sample_data/unsupported_file_type.usf')) as f: schema = from_file(f) assert(isinstance(schema, InvalidSchema)) assert(schema.reason[0:32] == f"File type not supported for file")
def test_complex_json(self): fs = LocalFileSystem() with fs.open(from_root('/test/sample_data/complex_json.json')) as f: schema = from_file(f) assert(isinstance(schema, JsonSchema)) expect = {'$schema': 'http://json-schema.org/schema#', 'type': 'object', 'properties': {'data': {'type': 'array', 'items': {'type': 'object','properties': {'field1': {'type': 'string'},'field2': {'type': ['integer', 'string']},'field3': {'type': 'string'},'field4': {'type': 'string'},'field5': {'type': 'object','properties': {'some_other_stuff': {'type': 'string'}},'required': ['some_other_stuff']}}}}}, 'required': ['data']} assert(schema.schema == expect)
def test_jsonl(self): fs = LocalFileSystem() with fs.open(from_root('/test/sample_data/json_lines.jsonl')) as f: schema = from_file(f) assert(isinstance(schema, JsonSchema)) expect = {'$schema': 'http://json-schema.org/schema#', 'properties': {'field': {'type': 'string'},'field2': {'type': 'string'},'field3': {'type': 'string'},'field4': {'type': 'string'},'field5': {'type': 'string'},'field6': {'type': 'string'},'field7': {'type': 'string'}}, 'type': 'object'} assert(schema.schema == expect)
def test_delete_cwd(tmpdir): cwd = os.getcwd() fs = LocalFileSystem() try: os.chdir(tmpdir) with pytest.raises(ValueError): fs.rm(".", recursive=True) finally: os.chdir(cwd)
def test_directories(tmpdir): tmpdir = make_path_posix(str(tmpdir)) fs = LocalFileSystem() fs.mkdir(tmpdir + "/dir") assert tmpdir + "/dir" in fs.ls(tmpdir) assert fs.ls(tmpdir, True)[0]["type"] == "directory" fs.rmdir(tmpdir + "/dir") assert not fs.ls(tmpdir) assert fs.ls(fs.root_marker)
def install_nifti(sourcedata_dir: PathLike, bids_filename: PathLike) -> None: from fsspec.implementations.local import LocalFileSystem fs = LocalFileSystem(auto_mkdir=True) source_file = fs.open(fs.ls(sourcedata_dir)[0], mode="rb") target_file = fs.open(bids_filename, mode="wb", compression="gzip") with source_file as sf, target_file as tf: tf.write(sf.read())
def test_get_pyarrow_filesystem(): pa = pytest.importorskip("pyarrow") fs = LocalFileSystem() assert isinstance(fs, pa.filesystem.FileSystem) assert fs._get_pyarrow_filesystem() is fs class UnknownFileSystem(object): pass assert not isinstance(UnknownFileSystem(), pa.filesystem.FileSystem)
def test_infer_compression(tmpdir, opener, ext): filename = str(tmpdir / f"test{ext}") content = b"hello world" with opener(filename, "wb") as fp: fp.write(content) fs = LocalFileSystem() with fs.open(f"file://{filename}", "rb", compression="infer") as fp: read_content = fp.read() assert content == read_content
def test_directories(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() fs.mkdir(tmpdir + '/dir') assert tmpdir + '/dir' in fs.ls(tmpdir) assert fs.ls(tmpdir, True)[0]['type'] == 'directory' fs.rmdir(tmpdir + '/dir') assert not fs.ls(tmpdir)
def test_directories(tmpdir): tmpdir = str(tmpdir) fs = LocalFileSystem() fs.mkdir(tmpdir + "/dir") assert tmpdir + "/dir" in fs.ls(tmpdir) assert fs.ls(tmpdir, True)[0]["type"] == "directory" fs.rmdir(tmpdir + "/dir") assert not fs.ls(tmpdir)
def test_valid_json(self): fs = LocalFileSystem() with fs.open(from_root('/test/sample_data/json_simple.json')) as f: schema = from_file(f) assert(isinstance(schema, JsonSchema)) expect = {'$schema': 'http://json-schema.org/schema#', 'properties': {'field': {'type': 'string'}, 'field2': {'type': 'string'}, 'field3': {'type': 'string'}}, 'required': ['field', 'field2', 'field3'], 'type': 'object'} assert(schema.schema == expect) assert(schema.to_dict() == {'Columns': [], 'SchemaType': 'json'}) assert(schema.to_pd_dict() == {})
def test_seekable(tmpdir): fs = LocalFileSystem() tmpdir = str(tmpdir) fn0 = os.path.join(tmpdir, "target") with open(fn0, "wb") as f: f.write(b"data") f = fs.open(fn0, "rt") assert f.seekable(), "file is not seekable" f.seek(1) assert f.read(1) == "a" assert f.tell() == 2
def remove_local_file(filepath: Union[str, Path]) -> None: """ Deletes a file from the local file system. Parameters ---------- filepath: str The filepath of the local file to delete. """ fs = LocalFileSystem() fs.rm(filepath) log.info(f"Removed {filepath} from local file system.")
def test_transaction(tmpdir): file = str(tmpdir / "test.txt") fs = LocalFileSystem() with fs.transaction: content = "hello world" with fs.open(file, "w") as fp: fp.write(content) with fs.open(file, "r") as fp: read_content = fp.read() assert content == read_content
def test_get_pyarrow_filesystem(): pa = pytest.importorskip("pyarrow") fs = LocalFileSystem() if LooseVersion(pa.__version__) < LooseVersion("2.0"): assert isinstance(fs, pa.filesystem.FileSystem) assert fs._get_pyarrow_filesystem() is fs else: assert not isinstance(fs, pa.filesystem.FileSystem) class UnknownFileSystem(object): pass assert not isinstance(UnknownFileSystem(), pa.filesystem.FileSystem)
def test_csv_equality(self): fs = LocalFileSystem() with fs.open(from_root('/test/sample_data/csv_sample.csv')) as f: schema1 = from_file(f, {"read_headers": True}) assert(isinstance(schema1, TextSchema)) with fs.open(from_root('/test/sample_data/csv_sample_2.csv')) as f: schema2 = from_file(f, {"read_headers": True}) assert(isinstance(schema2, TextSchema)) schema = find_conflicts([schema1, schema2])[0] assert(isinstance(schema, SchemaConflict)) expect = {'CountDistinctSchemas': 2, 'DistinctSchemas': [{'SchemaType': 'csv', 'Columns': [{'Name': 'type', 'Type': 'object'}, {'Name': 'price', 'Type': 'float64'}]},{'SchemaType': 'csv', 'Columns': [{'Name': 'type', 'Type': 'object'}, {'Name': 'price', 'Type': 'float64'}, {'Name': 'availabile', 'Type': 'bool'}, {'Name': 'date', 'Type': 'object'}]}], 'NonOverlappingColumns': [{'name': 'availabile', 'type': 'bool'}, {'name': 'date', 'type': 'object'}]} assert(schema.to_dict() == {'SchemaConflicts': expect})
def test_abs_paths(tmpdir): tmpdir = str(tmpdir) here = os.getcwd() os.chdir(tmpdir) with open("tmp", "w") as f: f.write("hi") out = LocalFileSystem().glob("*") assert len(out) == 1 assert "/" in out[0] assert "tmp" in out[0] fs = LocalFileSystem() os.chdir(here) with fs.open(out[0], "r") as f: res = f.read() assert res == "hi"
def get_filesystem(path: Union[str, Path]) -> AbstractFileSystem: path = str(path) if "://" in path: # use the fileystem from the protocol specified return fsspec.filesystem(path.split(":", 1)[0]) # use local filesystem return LocalFileSystem()
def test_prepare_args(sag, sge, tmp_path): """Test preparing arguments for getting ABI and GLM data.""" from sattools.scutil import prepare_abi_glm_ms_args from fsspec.implementations.local import LocalFileSystem from typhon.files.handlers.common import FileInfo from satpy.readers import FSFile sge.return_value = [ FileInfo(path=str(tmp_path / f"glm{i:d}"), times=[ datetime.datetime(1900, 1, 1, 0, i), datetime.datetime(1900, 1, 1, 0, i + 1) ], attr={}) for i in range(5) ] sag.return_value = [ FSFile(tmp_path / f"abi{i:d}", LocalFileSystem()) for i in range(5) ] (gfsfs, afsfs) = prepare_abi_glm_ms_args(datetime.datetime(1900, 1, 1, 0), datetime.datetime(1900, 1, 1, 6), chans={8, 10}, sector="F") assert sag.call_args[1]["sector"] == "F" assert sge.call_args[1]["sector"] == "F" with pytest.raises(ValueError): prepare_abi_glm_ms_args(datetime.datetime(1900, 1, 1, 0), datetime.datetime(1900, 1, 1, 6), chans={8, 10}, sector="M1")
def test_equality(): """Test sane behaviour for equality and hashing. Make sure that different CachingFileSystem only test equal to each other when they should, and do not test equal to the filesystem they rely upon. Similarly, make sure their hashes differ when they should and are equal when they should not. Related: GitHub#577, GitHub#578 """ from fsspec.implementations.local import LocalFileSystem lfs = LocalFileSystem() cfs1 = CachingFileSystem(fs=lfs, cache_storage="raspberry") cfs2 = CachingFileSystem(fs=lfs, cache_storage="banana") cfs3 = CachingFileSystem(fs=lfs, cache_storage="banana") assert cfs1 == cfs1 assert cfs1 != cfs2 assert cfs1 != cfs3 assert cfs2 == cfs3 assert cfs1 != lfs assert cfs2 != lfs assert cfs3 != lfs assert hash(lfs) != hash(cfs1) assert hash(lfs) != hash(cfs2) assert hash(lfs) != hash(cfs3) assert hash(cfs1) != hash(cfs2) assert hash(cfs1) != hash(cfs2) assert hash(cfs2) == hash(cfs3)