def test_chained_url(ftp_writable): host, port, username, password = "******", 2121, "user", "pass" @contextmanager def tempzip(data={}): f = tempfile.mkstemp(suffix="zip")[1] with zipfile.ZipFile(f, mode="w") as z: for k, v in data.items(): z.writestr(k, v) try: yield f finally: try: os.remove(f) except (IOError, OSError): pass data = {"afile": b"hello"} cls = fsspec.get_filesystem_class("ftp") fs = cls(host=host, port=port, username=username, password=password) with tempzip(data) as lfile: fs.put_file(lfile, "archive.zip") urls = [ "zip://afile", "zip://*****:*****@{host}:{port}/archive.zip" with fsspec.open(url, "rb") as f: assert f.read() == data["afile"]
def get_ecoregions(tempdir): fs = fsspec.get_filesystem_class("http")() for kind in ["baileys", "supersections"]: path = os.path.join(tempdir, f"raw/{kind}.geojson") fs.download( f"https://storage.googleapis.com/carbonplan-data/raw/ecoregions/{kind}.geojson", path)
def download(source_url: str, cache_location: str) -> str: """ Download a remote file to a cache. Parameters ---------- source_url : str Path or url to the source file. cache_location : str Path or url to the target location for the source file. Returns ------- target_url : str Path or url in the form of `{cache_location}/hash({source_url})`. """ fs = fsspec.get_filesystem_class( cache_location.split(':')[0])(token='cloud') target_url = os.path.join(cache_location, str(hash(source_url))) # there is probably a better way to do caching! try: fs.open(target_url) return target_url except FileNotFoundError: pass with fsspec.open(source_url, mode="rb") as source: with fs.open(target_url, mode="wb") as target: target.write(source.read()) return target_url
def test_combine_and_write(): ds = xr.tutorial.open_dataset("rasm").load() dsets = ds.isel(time=slice(18)), ds.isel(time=slice(18, None)) fs = fsspec.get_filesystem_class("memory")() for i, dset in enumerate(dsets): as_bytes = dset.to_netcdf() with fs.open(f"cache/{i}.nc", "wb") as f: f.write(as_bytes) sources = [f"memory://{dset}" for dset in fs.ls("cache")] # In a flow context target = "memory://target.zarr" with Flow("test") as flow: result = pangeo_forge.tasks.xarray.combine_and_write(sources, target, concat_dim="time", append_dim="time") assert isinstance(result, Task) flow.validate() result = pangeo_forge.tasks.xarray.combine_and_write.run(sources, target, concat_dim="time", append_dim="time") assert result == target result = xr.open_zarr(fs.get_mapper("target.zarr")) xr.testing.assert_equal(ds, result)
def test_get_asset_works_with_custom_filesystem(catalog_with_assets): fs = fsspec.get_filesystem_class("file") fs = fs() asset = get_asset(catalog_with_assets, asset_key="tile", item_id="tile_1", filesystem=fs) assert isinstance(asset, xr.DataArray)
def _get_fs_and_protocol(self): storage_options = self.storage_options or {} protocol, path = split_protocol(self.prefix_path) cls = fsspec.get_filesystem_class(protocol) options = cls._get_kwargs_from_urls(self.prefix_path) update_storage_options(options, storage_options) fs = cls(**options) return fs, protocol
def test_makedirs_exist_ok(ssh): f = fsspec.get_filesystem_class("sftp")(**ssh) f.makedirs("/a/b/c") with pytest.raises(FileExistsError, match="/a/b/c"): f.makedirs("/a/b/c", exist_ok=False) f.makedirs("/a/b/c", exist_ok=True)
def can_be_local(path): """Can the given URL be used wih open_local?""" from fsspec import get_filesystem_class try: return getattr(get_filesystem_class(get_protocol(path)), "local_file", False) except (ValueError, ImportError): # not in registry or import failed return False
def test_register(): cls = fsspec.get_filesystem_class(V3ioFS.protocol) assert cls is V3ioFS, 'not registered' options = { 'v3io_api': 'a.b.com', 'v3io_access_key': 's3cr3t', } fs = fsspec.filesystem('v3io', **options) assert isinstance(fs, V3ioFS), f'bad object class - {fs.__class__}'
def set_filesystem(self, filesystem=None): """ Configure driver authentication :param filesystem: (optional, `fsspec` compatible FileSystem instance) file system associated to the driver """ self.filesystem = filesystem if self.filesystem is None: fs = fsspec.get_filesystem_class("file") self.filesystem = fs()
def test_simple(ssh): f = fsspec.get_filesystem_class('sftp')(**ssh) f.mkdirs('/home/someuser/deeper') f.touch('/home/someuser/deeper/afile') assert f.find('/home/someuser') == ['/home/someuser/deeper/afile'] assert f.ls('/home/someuser/deeper/') == ['/home/someuser/deeper/afile'] assert f.info('/home/someuser/deeper/afile')['type'] == 'file' assert f.info('/home/someuser/deeper/afile')['size'] == 0 assert f.exists('/home/someuser') f.rm('/home/someuser', recursive=True) assert not f.exists('/home/someuser')
def test_simple(ssh): f = fsspec.get_filesystem_class("sftp")(**ssh) f.mkdirs("/home/someuser/deeper") f.touch("/home/someuser/deeper/afile") assert f.find("/home/someuser") == ["/home/someuser/deeper/afile"] assert f.ls("/home/someuser/deeper/") == ["/home/someuser/deeper/afile"] assert f.info("/home/someuser/deeper/afile")["type"] == "file" assert f.info("/home/someuser/deeper/afile")["size"] == 0 assert f.exists("/home/someuser") f.rm("/home/someuser", recursive=True) assert not f.exists("/home/someuser")
def join(self, path, *paths): """Join paths with a slash.""" self._validate_path(path) before, sep, last_path = path.rpartition(self.CHAIN_SEPARATOR) chain_prefix = before + sep protocol, path = fsspec.core.split_protocol(last_path) fs = fsspec.get_filesystem_class(protocol) if protocol: chain_prefix += protocol + self.SEPARATOR return chain_prefix + self._join(fs.sep, ((path, ) + paths))
def get_dir(path): if '://' in path: protocol, _ = split_protocol(path) out = get_filesystem_class(protocol)._parent(path) if "://" not in out: # some FSs strip this, some do not out = protocol + "://" + out return out path = make_path_posix(os.path.join(os.getcwd(), os.path.dirname(path))) if path[-1] != '/': path += '/' return path
def wsi_file_urlpath(wsi_file): if wsi_file.stat().st_size > 100 * 1024 * 1024: pytest.skip("reduce ram usage of tests") urlpath = f"memory://{wsi_file.name}" fs: fsspec.AbstractFileSystem = fsspec.get_filesystem_class("memory")() of = fsspec.open(urlpath, mode="wb") with of as f: f.write(wsi_file.read_bytes()) try: yield urlpath finally: fs.rm(wsi_file.name)
def test_simple(smb_params): adir = "/home/adir" adir2 = "/home/adir/otherdir/" afile = "/home/adir/otherdir/afile" fsmb = fsspec.get_filesystem_class("smb")(**smb_params) fsmb.mkdirs(adir2) fsmb.touch(afile) assert fsmb.find(adir) == [afile] assert fsmb.ls(adir2, detail=False) == [afile] assert fsmb.info(afile)["type"] == "file" assert fsmb.info(afile)["size"] == 0 assert fsmb.exists(adir) fsmb.rm(adir, recursive=True) assert not fsmb.exists(adir)
def _get_fsspec_filesystem(filename): """ _get_fsspec_filesystem checks if the provided protocol is known to fsspec and if so returns the filesystem wrapper for it. """ if not FSSPEC_ENABLED: return None segment = filename.partition(FSSpecFileSystem.CHAIN_SEPARATOR)[0] protocol = segment.partition(FSSpecFileSystem.SEPARATOR)[0] if fsspec.get_filesystem_class(protocol): return _FSSPEC_FILESYSTEM else: return None
def nc2zarr(source_url: str, cache_location: str) -> str: """convert netcdf data to zarr""" fs = fsspec.get_filesystem_class(source_url.split(':')[0])(token='cloud') target_url = source_url + ".zarr" with dask.config.set(scheduler="single-threaded"): ds = (xr.open_dataset(fs.open(source_url)).pipe(preproc).pipe( postproc).load().chunk(chunks)) mapper = fs.get_mapper(target_url) ds.to_zarr(mapper, mode='w') return target_url
def test_transaction(ssh): f = fsspec.get_filesystem_class('sftp')(**ssh) f.mkdirs('/home/someuser/deeper') f.start_transaction() f.touch('/home/someuser/deeper/afile') assert f.find('/home/someuser') == [] f.end_transaction() f.find('/home/someuser') == ['/home/someuser/deeper/afile'] with f.transaction: assert f._intrans f.touch('/home/someuser/deeper/afile2') assert f.find('/home/someuser') == ['/home/someuser/deeper/afile'] assert f.find('/home/someuser') == [ '/home/someuser/deeper/afile', '/home/someuser/deeper/afile2' ]
def get_projects(tempdir): with fsspec.open( "https://carbonplan.blob.core.windows.net/carbonplan-forests/offsets/database/forest-offsets-database-v1.0.json" ) as f: projects = json.load(f) fs = fsspec.get_filesystem_class("http")() for project in projects: pid = project["id"] slug = f"projects/{pid}/shape.json" print(slug, "slug") path = os.path.join(tempdir, f"raw/{slug}") print(path, "path") fs.download( f"https://carbonplan.blob.core.windows.net/carbonplan-forests/offsets/database/{slug}", path, )
def test_transaction(ssh): f = fsspec.get_filesystem_class("sftp")(**ssh) f.mkdirs("/home/someuser/deeper") f.start_transaction() f.touch("/home/someuser/deeper/afile") assert f.find("/home/someuser") == [] f.end_transaction() f.find("/home/someuser") == ["/home/someuser/deeper/afile"] with f.transaction: assert f._intrans f.touch("/home/someuser/deeper/afile2") assert f.find("/home/someuser") == ["/home/someuser/deeper/afile"] assert f.find("/home/someuser") == [ "/home/someuser/deeper/afile", "/home/someuser/deeper/afile2", ]
def test_transaction(smb_params): afile = "/home/afolder/otherdir/afile" afile2 = "/home/afolder/otherdir/afile2" adir = "/home/afolder" adir2 = "/home/afolder/otherdir" fsmb = fsspec.get_filesystem_class("smb")(**smb_params) fsmb.mkdirs(adir2) fsmb.start_transaction() fsmb.touch(afile) assert fsmb.find(adir) == [] fsmb.end_transaction() assert fsmb.find(adir) == [afile] with fsmb.transaction: assert fsmb._intrans fsmb.touch(afile2) assert fsmb.find(adir) == [afile] assert fsmb.find(adir) == [afile, afile2]
def test_chained_url(ftp_writable): host, port, username, password = ftp_writable data = {"afile": b"hello"} cls = fsspec.get_filesystem_class("ftp") fs = cls(host=host, port=port, username=username, password=password) with tempzip(data) as lfile: fs.put_file(lfile, "archive.zip") urls = [ "zip://afile", "zip://*****:*****@{host}:{port}/archive.zip" with fsspec.open(url, "rb") as f: assert f.read() == data["afile"]
def extract_files_to_message(file, fs, subject, packing=None): """Try extracting a file virtually and create the corresponding message. If the file is not an archive, create a message with the original file instead. """ file, filename = _get_filename(file, fs) if packing is None: return create_message_with_json_fs(fs.to_json(), file, subject) fs_class = get_filesystem_class(packing) protocol = _get_fs_protocol(fs) packfs = fs_class(fo=filename, target_protocol=protocol, target_options=fs.storage_options) file_list = list(packfs.find('/', detail=True).values()) return create_message_with_json_fs(packfs.to_json(), file_list, subject, file.get('metadata'))
def test_list(server): h = fsspec.get_filesystem_class('http')() out = h.glob(server + '/*.py') expected = glob.glob('*.py') for fn in expected: assert any(fn in f for f in out)
def test_pickle(): with tempzip(data) as z: fs = fsspec.get_filesystem_class("zip")(fo=z) fs2 = pickle.loads(pickle.dumps(fs)) assert fs2.cat("b") == b"hello"
def test_mapping(): with tempzip(data) as z: fs = fsspec.get_filesystem_class("zip")(fo=z) m = fs.get_mapper("") assert list(m) == ["a", "b", "deeply/nested/path"] assert m["b"] == data["b"]
def test_empty(): with tempzip() as z: fs = fsspec.get_filesystem_class("zip")(fo=z) assert fs.find("") == []
def upload_tiles(kind, tempdir, upload_to): print(f"uploading {kind} to {upload_to}") fs = fsspec.get_filesystem_class(upload_to.split(":")[0])() lpath = f"{tempdir}/processed/{kind}/" rpath = f"{upload_to}/{kind}" fs.put(lpath, rpath, recursive=True)
def get_fires(tempdir): fs = fsspec.get_filesystem_class("http")() path = os.path.join(tempdir, "raw/fires.geojson") fs.download( "https://storage.googleapis.com/carbonplan-research/offset-fires/fires.json", path)