def test_du(): fs = MemoryFileSystem() fs.store = { '/dir/afile': MemoryFile(fs, '/afile', b'a'), '/dir/dirb/afile': MemoryFile(fs, '/afile', b'bb'), '/dir/dirb/bfile': MemoryFile(fs, '/afile', b'ccc'), } assert fs.du('/dir') == 6 assert fs.du('/dir', total=False)['/dir/dirb/afile'] == 2 assert fs.du('/dir', maxdepth=0) == 1
def test_du(): fs = MemoryFileSystem() fs.store = { "/dir/afile": MemoryFile(fs, "/afile", b"a"), "/dir/dirb/afile": MemoryFile(fs, "/afile", b"bb"), "/dir/dirb/bfile": MemoryFile(fs, "/afile", b"ccc"), } assert fs.du("/dir") == 6 assert fs.du("/dir", total=False)["/dir/dirb/afile"] == 2 assert fs.du("/dir", maxdepth=0) == 1
def test_generic_open_FSFile_MemoryFileSystem(self): """Test the generic_open method with FSFile in MemoryFileSystem.""" mem_fs = MemoryFileSystem() mem_file = MemoryFile(fs=mem_fs, path="{}test.DAT".format(mem_fs.root_marker), data=b"TEST") mem_file.commit() fsf = FSFile(mem_file) with hf.generic_open(fsf) as file_object: data = file_object.read() assert data == b'TEST'
def test_read_csv(cleared_fs): from fsspec.implementations.memory import MemoryFile cleared_fs.store["test/test.csv"] = MemoryFile(data=text) df2 = read_csv("memory://test/test.csv", parse_dates=["dt"]) tm.assert_frame_equal(df1, df2)
def test_get_put(tmpdir): tmpdir = str(tmpdir) fn = os.path.join(tmpdir, "one") open(fn, "wb").write(b"one") os.mkdir(os.path.join(tmpdir, "dir")) fn2 = os.path.join(tmpdir, "dir", "two") open(fn2, "wb").write(b"two") fs = MemoryFileSystem() fs.put(fn, "/afile") assert fs.cat("/afile") == b"one" fs.store["/bfile"] = MemoryFile(fs, "/bfile", b"data") fn3 = os.path.join(tmpdir, "three") fs.get("/bfile", fn3) assert open(fn3, "rb").read() == b"data" fs.put(tmpdir, "/more", recursive=True) assert fs.find("/more") == ["/more/dir/two", "/more/one", "/more/three"] for f in [fn, fn2, fn3]: os.remove(f) os.rmdir(os.path.join(tmpdir, "dir")) fs.get("/more/", tmpdir + "/", recursive=True) assert open(fn3, "rb").read() == b"data" assert open(fn, "rb").read() == b"one"
def test_get_put(tmpdir): tmpdir = str(tmpdir) fn = os.path.join(tmpdir, 'one') open(fn, 'wb').write(b'one') os.mkdir(os.path.join(tmpdir, 'dir')) fn2 = os.path.join(tmpdir, 'dir', 'two') open(fn2, 'wb').write(b'two') fs = MemoryFileSystem() fs.put(fn, '/afile') assert fs.cat('/afile') == b'one' fs.store['/bfile'] = MemoryFile(fs, '/bfile', b'data') fn3 = os.path.join(tmpdir, 'three') fs.get('/bfile', fn3) assert open(fn3, 'rb').read() == b'data' fs.put(tmpdir, '/more', recursive=True) assert fs.find('/more') == ['/more/dir/two', '/more/one', '/more/three'] for f in [fn, fn2, fn3]: os.remove(f) os.rmdir(os.path.join(tmpdir, 'dir')) fs.get('/more/', tmpdir + '/', recursive=True) assert open(fn3, 'rb').read() == b'data' assert open(fn, 'rb').read() == b'one'
def _open( self, path, mode="rb", block_size=None, autocommit=True, cache_options=None, **kwargs, ): path = self._strip_protocol(path) if mode != "rb": raise NotImplementedError data = bytes() with self._open_archive() as arc: # FIXME? dropwhile would increase performance but less readable for entry in arc: if entry.pathname != path: continue for block in entry.get_blocks(entry.size): data = block break else: raise ValueError return MemoryFile(fs=self, path=path, data=data)
def test_get_put(tmpdir): tmpdir = str(tmpdir) fn = os.path.join(tmpdir, "one") open(fn, "wb").write(b"one") os.mkdir(os.path.join(tmpdir, "dir")) fn2 = os.path.join(tmpdir, "dir", "two") open(fn2, "wb").write(b"two") fs = MemoryFileSystem() fs.put(fn, "/afile") assert fs.cat("/afile") == b"one" fs.store["/bfile"] = MemoryFile(fs, "/bfile", b"data") fn3 = os.path.join(tmpdir, "three") fs.get("/bfile", fn3) assert open(fn3, "rb").read() == b"data" fs.put(tmpdir, "/more", recursive=True) assert fs.find("/more") == ["/more/dir/two", "/more/one", "/more/three"] @contextlib.contextmanager def tmp_chdir(path): curdir = os.getcwd() os.chdir(path) try: yield finally: os.chdir(curdir) with tmp_chdir(os.path.join(tmpdir, os.path.pardir)): fs.put(os.path.basename(tmpdir), "/moretwo", recursive=True) assert fs.find("/moretwo") == [ "/moretwo/dir/two", "/moretwo/one", "/moretwo/three", ] with tmp_chdir(tmpdir): fs.put(os.path.curdir, "/morethree", recursive=True) assert fs.find("/morethree") == [ "/morethree/dir/two", "/morethree/one", "/morethree/three", ] for f in [fn, fn2, fn3]: os.remove(f) os.rmdir(os.path.join(tmpdir, "dir")) fs.get("/more/", tmpdir + "/", recursive=True) assert open(fn3, "rb").read() == b"data" assert open(fn, "rb").read() == b"one"
def _open(self, path, mode="rb", block_size=None, **kwargs): if mode != "rb": raise NotImplementedError base_url = self.url.format( owner=self.owner, title=self.title, model_pk=self.model_pk, ) if self.resource == "inputs": url = base_url + "edit/" elif self.resource is None: url = base_url + "remote/" else: url = base_url if self.api_token is not None: headers = {"Authorization": f"Token {self.api_token}"} else: headers = None r = requests.get(url, headers=headers) if r.status_code == 404: raise FileNotFoundError(path) r.raise_for_status() data = r.json() if self.resource == "inputs" and self.field is None: result = data elif self.resource == "inputs" and self.field != "adjustment": result = data[self.field] elif self.resource == "inputs" and self.field == "adjustment": result = data[self.field] if self.section is not None: result = result[self.section] elif self.resource == "outputs": result = data["outputs"]["downloadable"] elif self.resource in ("title", "owner"): result = {self.resource: data[self.resource]} elif self.resource is None: result = dict(data, outputs=base_url) else: raise FileNotFoundError() return MemoryFile(None, None, json.dumps(result).encode("utf-8"))