Пример #1
0
def test_file_ops(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    with pytest.raises(FileNotFoundError):
        fs.info(tmpdir + "/nofile")
    fs.touch(tmpdir + "/afile")
    i1 = fs.ukey(tmpdir + "/afile")

    assert tmpdir + "/afile" in fs.ls(tmpdir)

    with fs.open(tmpdir + "/afile", "wb") as f:
        f.write(b"data")
    i2 = fs.ukey(tmpdir + "/afile")
    assert i1 != i2  # because file changed

    fs.copy(tmpdir + "/afile", tmpdir + "/afile2")
    assert tmpdir + "/afile2" in fs.ls(tmpdir)

    fs.move(tmpdir + "/afile", tmpdir + "/afile3")
    assert not fs.exists(tmpdir + "/afile")

    fs.rm(tmpdir + "/afile3", recursive=True)
    assert not fs.exists(tmpdir + "/afile3")

    fs.rm(tmpdir, recursive=True)
    assert not fs.exists(tmpdir)
Пример #2
0
def test_file_ops(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    with pytest.raises(FileNotFoundError):
        fs.info(tmpdir + '/nofile')
    fs.touch(tmpdir + '/afile')
    i1 = fs.ukey(tmpdir + '/afile')

    assert tmpdir + '/afile' in fs.ls(tmpdir)

    with fs.open(tmpdir + '/afile', 'wb') as f:
        f.write(b'data')
    i2 = fs.ukey(tmpdir + '/afile')
    assert i1 != i2  # because file changed

    fs.copy(tmpdir + '/afile', tmpdir + '/afile2')
    assert tmpdir + '/afile2' in fs.ls(tmpdir)

    fs.move(tmpdir + '/afile', tmpdir + '/afile3')
    assert not fs.exists(tmpdir + '/afile')

    fs.rm(tmpdir + '/afile3', recursive=True)
    assert not fs.exists(tmpdir + '/afile3')

    fs.rm(tmpdir, recursive=True)
    assert not fs.exists(tmpdir)
Пример #3
0
def install_bids(sourcedata_dir: PathLike, bids_filename: PathLike) -> None:
    from pathlib import Path

    from fsspec.implementations.local import LocalFileSystem

    fs = LocalFileSystem(auto_mkdir=True)

    source_file = fs.open(fs.ls(sourcedata_dir)[0], mode="rb")
    target_file = fs.open(bids_filename, mode="wb")

    with source_file as sf, target_file as tf:
        tf.write(sf.read())

    source_basename = Path(Path(Path(fs.ls(sourcedata_dir)[0]).stem).stem)
    target_basename = Path(bids_filename.stem).stem

    # The following part adds the sidecar files related to the nifti with the same name: it can be tsv or json files.
    # It may or may not be used, since there might not be any sidecars.
    sidecar_dir = sourcedata_dir.parent / "BIDS"
    for source_sidecar in sidecar_dir.rglob(f"{source_basename}*"):
        target_sidecar = Path.joinpath(
            bids_filename.parent, target_basename).with_name(
                f"{target_basename}{source_sidecar.suffix}")
        source_file = fs.open(source_sidecar, mode="rb")
        target_file = fs.open(target_sidecar, mode="wb")
        with source_file as sf, target_file as tf:
            tf.write(sf.read())
Пример #4
0
def test_directories(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    fs.mkdir(tmpdir + "/dir")
    assert tmpdir + "/dir" in fs.ls(tmpdir)
    assert fs.ls(tmpdir, True)[0]["type"] == "directory"
    fs.rmdir(tmpdir + "/dir")
    assert not fs.ls(tmpdir)
Пример #5
0
def test_directories(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    fs.mkdir(tmpdir + '/dir')
    assert tmpdir + '/dir' in fs.ls(tmpdir)
    assert fs.ls(tmpdir, True)[0]['type'] == 'directory'
    fs.rmdir(tmpdir + '/dir')
    assert not fs.ls(tmpdir)
Пример #6
0
def test_directories(tmpdir):
    tmpdir = make_path_posix(str(tmpdir))
    fs = LocalFileSystem()
    fs.mkdir(tmpdir + "/dir")
    assert tmpdir + "/dir" in fs.ls(tmpdir)
    assert fs.ls(tmpdir, True)[0]["type"] == "directory"
    fs.rmdir(tmpdir + "/dir")
    assert not fs.ls(tmpdir)
    assert fs.ls(fs.root_marker)
Пример #7
0
def test_file_ops(tmpdir):
    tmpdir = make_path_posix(str(tmpdir))
    fs = LocalFileSystem(auto_mkdir=True)
    with pytest.raises(FileNotFoundError):
        fs.info(tmpdir + "/nofile")
    fs.touch(tmpdir + "/afile")
    i1 = fs.ukey(tmpdir + "/afile")

    assert tmpdir + "/afile" in fs.ls(tmpdir)

    with fs.open(tmpdir + "/afile", "wb") as f:
        f.write(b"data")
    i2 = fs.ukey(tmpdir + "/afile")
    assert i1 != i2  # because file changed

    fs.copy(tmpdir + "/afile", tmpdir + "/afile2")
    assert tmpdir + "/afile2" in fs.ls(tmpdir)

    fs.move(tmpdir + "/afile", tmpdir + "/afile3")
    assert not fs.exists(tmpdir + "/afile")

    fs.cp(tmpdir + "/afile3", tmpdir + "/deeply/nested/file")
    assert fs.exists(tmpdir + "/deeply/nested/file")

    fs.rm(tmpdir + "/afile3", recursive=True)
    assert not fs.exists(tmpdir + "/afile3")

    files = [tmpdir + "/afile4", tmpdir + "/afile5"]
    [fs.touch(f) for f in files]

    with pytest.raises(TypeError):
        fs.rm_file(files)
    fs.rm(files)
    assert all(not fs.exists(f) for f in files)

    fs.touch(tmpdir + "/afile6")
    fs.rm_file(tmpdir + "/afile6")
    assert not fs.exists(tmpdir + "/afile6")

    # IsADirectoryError raised on Linux, PermissionError on Windows
    with pytest.raises((IsADirectoryError, PermissionError)):
        fs.rm_file(tmpdir)

    fs.rm(tmpdir, recursive=True)
    assert not fs.exists(tmpdir)
Пример #8
0
def install_nifti(sourcedata_dir: PathLike, bids_filename: PathLike) -> None:
    from fsspec.implementations.local import LocalFileSystem

    fs = LocalFileSystem(auto_mkdir=True)
    source_file = fs.open(fs.ls(sourcedata_dir)[0], mode="rb")
    target_file = fs.open(bids_filename, mode="wb", compression="gzip")

    with source_file as sf, target_file as tf:
        tf.write(sf.read())
Пример #9
0
class FsspecLocalFileSystem(AbstractFileSystem):
    sep = os.sep

    def __init__(self, *args, **kwargs):
        from fsspec.implementations.local import LocalFileSystem as LocalFS

        super().__init__(*args, **kwargs)
        self.fs = LocalFS()

    def makedirs(self, path, exist_ok=False):
        makedirs(path, exist_ok=exist_ok)

    def mkdir(self, path, create_parents=True, **kwargs):
        if self.exists(path):
            raise FileExistsError(path)
        if create_parents:
            self.makedirs(path, exist_ok=True)
        else:
            os.mkdir(path, **kwargs)

    def lexists(self, path, **kwargs):
        return os.path.lexists(path)

    def exists(self, path, **kwargs):
        # TODO: replace this with os.path.exists once the problem is fixed on
        # the fsspec https://github.com/intake/filesystem_spec/issues/742
        return os.path.lexists(path)

    def checksum(self, path) -> str:
        from fsspec.utils import tokenize

        st = os.stat(path)
        return str(int(tokenize([st.st_ino, st.st_mtime, st.st_size]), 16))

    def info(self, path, **kwargs):
        return self.fs.info(path)

    def ls(self, path, **kwargs):
        return self.fs.ls(path, **kwargs)

    def isfile(self, path) -> bool:
        return os.path.isfile(path)

    def isdir(self, path) -> bool:
        return os.path.isdir(path)

    def walk(self, path, maxdepth=None, topdown=True, **kwargs):
        """Directory fs generator.

        See `os.walk` for the docs. Differences:
        - no support for symlinks
        """
        for root, dirs, files in os.walk(
            path,
            topdown=topdown,
        ):
            yield os.path.normpath(root), dirs, files

    def find(self, path, **kwargs):
        for root, _, files in self.walk(path, **kwargs):
            for file in files:
                # NOTE: os.path.join is ~5.5 times slower
                yield f"{root}{os.sep}{file}"

    @classmethod
    def _parent(cls, path):
        return os.path.dirname(path)

    def put_file(self, lpath, rpath, callback=None, **kwargs):
        parent = self._parent(rpath)
        makedirs(parent, exist_ok=True)
        tmp_file = os.path.join(parent, tmp_fname())
        copyfile(lpath, tmp_file, callback=callback)
        os.replace(tmp_file, rpath)

    def get_file(self, rpath, lpath, callback=None, **kwargs):
        copyfile(rpath, lpath, callback=callback)

    def mv(self, path1, path2, **kwargs):
        self.makedirs(self._parent(path2), exist_ok=True)
        move(path1, path2)

    def rmdir(self, path):
        os.rmdir(path)

    def rm_file(self, path):
        remove(path)

    def rm(self, path, recursive=False, maxdepth=None):
        remove(path)

    def copy(self, path1, path2, recursive=False, on_error=None, **kwargs):
        tmp_info = os.path.join(self._parent(path2), tmp_fname(""))
        try:
            copyfile(path1, tmp_info)
            os.rename(tmp_info, path2)
        except Exception:
            self.rm_file(tmp_info)
            raise

    def open(self, path, mode="r", encoding=None, **kwargs):
        return open(path, mode=mode, encoding=encoding)

    def symlink(self, path1, path2):
        return System.symlink(path1, path2)

    @staticmethod
    def is_symlink(path):
        return System.is_symlink(path)

    @staticmethod
    def is_hardlink(path):
        return System.is_hardlink(path)

    def hardlink(self, path1, path2):
        # If there are a lot of empty files (which happens a lot in datasets),
        # and the cache type is `hardlink`, we might reach link limits and
        # will get something like: `too many links error`
        #
        # This is because all those empty files will have the same hash
        # (i.e. 68b329da9893e34099c7d8ad5cb9c940), therefore, they will be
        # linked to the same file in the cache.
        #
        # From https://en.wikipedia.org/wiki/Hard_link
        #   * ext4 limits the number of hard links on a file to 65,000
        #   * Windows with NTFS has a limit of 1024 hard links on a file
        #
        # That's why we simply create an empty file rather than a link.
        if self.size(path1) == 0:
            self.open(path2, "w").close()

            logger.debug("Created empty file: %s -> %s", path1, path2)
            return

        return System.hardlink(path1, path2)

    def reflink(self, path1, path2):
        return System.reflink(path1, path2)
Пример #10
0
class FileSystem(AbstractFileSystem):
    """Wrapper for easier initialization of various file-system classes"""

    def __init__(self, name="local", assumed_role=None, endpoint_url=None):
        super().__init__()
        self.name = name
        self.assume_client = None
        self.assume_role = assumed_role
        self.endpoint_url = endpoint_url

        if self.name == "local":
            self.filesystem = LocalFileSystem()
        elif self.name == "s3":
            session = botocore.session.get_session()
            if self.assume_role:
                self.assume_client = session.create_client("sts")
                session_credentials = (
                    botocore.credentials.RefreshableCredentials.create_from_metadata(
                        metadata=self._sts_refresh(),
                        refresh_using=self._sts_refresh,
                        method="sts-assume-role",
                    )
                )
                session._credentials = session_credentials

            client_kwargs = {"endpoint_url": endpoint_url} if endpoint_url else None
            self.filesystem = s3fs.S3FileSystem(
                session=session, client_kwargs=client_kwargs
            )
        else:
            raise ValueError(f"Unsupported FileReader type: {type}")

        for method_name, method in inspect.getmembers(
            self.filesystem, predicate=inspect.ismethod
        ):
            if method_name not in (
                "__init__",
                "_rm",
                "cp_file",
                "created",
                "ls",
                "modified",
                "sign",
            ):
                setattr(self, method_name, method)

    def _sts_refresh(self):
        """Refresh tokens by calling assume_role again"""
        response = self.assume_client.assume_role(
            RoleArn=self.assume_role,
            RoleSessionName=f"data-toolz-filesystem-s3-{uuid4()}",
            DurationSeconds=3600,
        ).get("Credentials")
        return {
            "access_key": response.get("AccessKeyId"),
            "secret_key": response.get("SecretAccessKey"),
            "token": response.get("SessionToken"),
            "expiry_time": response.get("Expiration").isoformat(),
        }

    def _rm(self, path):
        return self.filesystem.rm(path=path)

    def cp_file(self, path1, path2, **kwargs):
        return self.filesystem.copy(path1=path1, path2=path2, **kwargs)

    def created(self, path):
        return self.filesystem.created(path=path)

    def ls(self, path, detail=True, **kwargs):
        return self.filesystem.ls(path=path, detail=detail, **kwargs)

    def modified(self, path):
        return self.filesystem.modified(path=path)

    def sign(self, path, expiration=100, **kwargs):
        return self.filesystem.sign(path=path, expiration=expiration, **kwargs)