Пример #1
0
def test_links(tmpdir):
    tmpdir = str(tmpdir)
    fn0 = os.path.join(tmpdir, "target")
    fn1 = os.path.join(tmpdir, "link1")
    fn2 = os.path.join(tmpdir, "link2")
    data = b"my target data"
    with open(fn0, "wb") as f:
        f.write(data)
    os.symlink(fn0, fn1)
    os.symlink(fn0, fn2)

    fs = LocalFileSystem()
    assert fs.info(fn0)["type"] == "file"
    assert fs.info(fn1)["type"] == "link"
    assert fs.info(fn2)["type"] == "link"

    assert fs.info(fn0)["size"] == len(data)
    assert fs.info(fn1)["size"] == len(data)
    assert fs.info(fn2)["size"] == len(data)

    of = fsspec.open(fn1, "rb")
    with of as f:
        assert f.read() == data

    of = fsspec.open(fn2, "rb")
    with of as f:
        assert f.read() == data
Пример #2
0
def test_links(tmpdir):
    tmpdir = str(tmpdir)
    fn0 = os.path.join(tmpdir, "target")
    fn1 = os.path.join(tmpdir, "link1")
    fn2 = os.path.join(tmpdir, "link2")
    data = b"my target data"
    with open(fn0, "wb") as f:
        f.write(data)
    try:
        os.symlink(fn0, fn1)
        os.symlink(fn0, fn2)
    except OSError:
        if WIN:
            pytest.xfail("Ran on win without admin permissions")
        else:
            raise

    fs = LocalFileSystem()
    assert fs.info(fn0)["type"] == "file"
    assert fs.info(fn1)["type"] == "link"
    assert fs.info(fn2)["type"] == "link"

    assert fs.info(fn0)["size"] == len(data)
    assert fs.info(fn1)["size"] == len(data)
    assert fs.info(fn2)["size"] == len(data)

    of = fsspec.open(fn1, "rb")
    with of as f:
        assert f.read() == data

    of = fsspec.open(fn2, "rb")
    with of as f:
        assert f.read() == data
Пример #3
0
def test_file_ops(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    with pytest.raises(FileNotFoundError):
        fs.info(tmpdir + "/nofile")
    fs.touch(tmpdir + "/afile")
    i1 = fs.ukey(tmpdir + "/afile")

    assert tmpdir + "/afile" in fs.ls(tmpdir)

    with fs.open(tmpdir + "/afile", "wb") as f:
        f.write(b"data")
    i2 = fs.ukey(tmpdir + "/afile")
    assert i1 != i2  # because file changed

    fs.copy(tmpdir + "/afile", tmpdir + "/afile2")
    assert tmpdir + "/afile2" in fs.ls(tmpdir)

    fs.move(tmpdir + "/afile", tmpdir + "/afile3")
    assert not fs.exists(tmpdir + "/afile")

    fs.rm(tmpdir + "/afile3", recursive=True)
    assert not fs.exists(tmpdir + "/afile3")

    fs.rm(tmpdir, recursive=True)
    assert not fs.exists(tmpdir)
Пример #4
0
def test_links(tmpdir):
    tmpdir = str(tmpdir)
    fn0 = os.path.join(tmpdir, 'target')
    fn1 = os.path.join(tmpdir, 'link1')
    fn2 = os.path.join(tmpdir, 'link2')
    data = b'my target data'
    with open(fn0, 'wb') as f:
        f.write(data)
    os.symlink(fn0, fn1)
    os.symlink(fn0, fn2)

    fs = LocalFileSystem()
    assert fs.info(fn0)['type'] == 'file'
    assert fs.info(fn1)['type'] == 'link'
    assert fs.info(fn2)['type'] == 'link'

    assert fs.info(fn0)['size'] == len(data)
    assert fs.info(fn1)['size'] == len(data)
    assert fs.info(fn2)['size'] == len(data)

    of = fsspec.open(fn1, 'rb')
    with of as f:
        assert f.read() == data

    of = fsspec.open(fn2, 'rb')
    with of as f:
        assert f.read() == data
Пример #5
0
def test_file_ops(tmpdir):
    tmpdir = str(tmpdir)
    fs = LocalFileSystem()
    with pytest.raises(FileNotFoundError):
        fs.info(tmpdir + '/nofile')
    fs.touch(tmpdir + '/afile')
    i1 = fs.ukey(tmpdir + '/afile')

    assert tmpdir + '/afile' in fs.ls(tmpdir)

    with fs.open(tmpdir + '/afile', 'wb') as f:
        f.write(b'data')
    i2 = fs.ukey(tmpdir + '/afile')
    assert i1 != i2  # because file changed

    fs.copy(tmpdir + '/afile', tmpdir + '/afile2')
    assert tmpdir + '/afile2' in fs.ls(tmpdir)

    fs.move(tmpdir + '/afile', tmpdir + '/afile3')
    assert not fs.exists(tmpdir + '/afile')

    fs.rm(tmpdir + '/afile3', recursive=True)
    assert not fs.exists(tmpdir + '/afile3')

    fs.rm(tmpdir, recursive=True)
    assert not fs.exists(tmpdir)
Пример #6
0
def test_linked_directories(tmpdir):
    tmpdir = str(tmpdir)

    subdir0 = os.path.join(tmpdir, "target")
    subdir1 = os.path.join(tmpdir, "link1")
    subdir2 = os.path.join(tmpdir, "link2")

    os.makedirs(subdir0)

    try:
        os.symlink(subdir0, subdir1)
        os.symlink(subdir0, subdir2)
    except OSError:
        if WIN:
            pytest.xfail("Ran on win without admin permissions")
        else:
            raise

    fs = LocalFileSystem()
    assert fs.info(subdir0)["type"] == "directory"
    assert fs.info(subdir1)["type"] == "directory"
    assert fs.info(subdir2)["type"] == "directory"

    assert not fs.info(subdir0)["islink"]
    assert fs.info(subdir1)["islink"]
    assert fs.info(subdir2)["islink"]
Пример #7
0
def test_file_ops(tmpdir):
    tmpdir = make_path_posix(str(tmpdir))
    fs = LocalFileSystem(auto_mkdir=True)
    with pytest.raises(FileNotFoundError):
        fs.info(tmpdir + "/nofile")
    fs.touch(tmpdir + "/afile")
    i1 = fs.ukey(tmpdir + "/afile")

    assert tmpdir + "/afile" in fs.ls(tmpdir)

    with fs.open(tmpdir + "/afile", "wb") as f:
        f.write(b"data")
    i2 = fs.ukey(tmpdir + "/afile")
    assert i1 != i2  # because file changed

    fs.copy(tmpdir + "/afile", tmpdir + "/afile2")
    assert tmpdir + "/afile2" in fs.ls(tmpdir)

    fs.move(tmpdir + "/afile", tmpdir + "/afile3")
    assert not fs.exists(tmpdir + "/afile")

    fs.cp(tmpdir + "/afile3", tmpdir + "/deeply/nested/file")
    assert fs.exists(tmpdir + "/deeply/nested/file")

    fs.rm(tmpdir + "/afile3", recursive=True)
    assert not fs.exists(tmpdir + "/afile3")

    files = [tmpdir + "/afile4", tmpdir + "/afile5"]
    [fs.touch(f) for f in files]

    with pytest.raises(TypeError):
        fs.rm_file(files)
    fs.rm(files)
    assert all(not fs.exists(f) for f in files)

    fs.touch(tmpdir + "/afile6")
    fs.rm_file(tmpdir + "/afile6")
    assert not fs.exists(tmpdir + "/afile6")

    # IsADirectoryError raised on Linux, PermissionError on Windows
    with pytest.raises((IsADirectoryError, PermissionError)):
        fs.rm_file(tmpdir)

    fs.rm(tmpdir, recursive=True)
    assert not fs.exists(tmpdir)
Пример #8
0
class LocalFileSystem(FileSystem):
    sep = os.sep

    scheme = Schemes.LOCAL
    PARAM_CHECKSUM = "md5"
    PARAM_PATH = "path"
    TRAVERSE_PREFIX_LEN = 2

    def __init__(self, **config):
        from fsspec.implementations.local import LocalFileSystem as LocalFS

        super().__init__(**config)
        self.fs = LocalFS()

    @staticmethod
    def open(path, mode="r", encoding=None, **kwargs):
        return open(path, mode=mode, encoding=encoding)

    def exists(self, path) -> bool:
        # TODO: replace this with os.path.exists once the problem is fixed on
        # the fsspec https://github.com/intake/filesystem_spec/issues/742
        return os.path.lexists(path)

    def checksum(self, path) -> str:
        from fsspec.utils import tokenize

        st = os.stat(path)

        return str(int(tokenize([st.st_ino, st.st_mtime, st.st_size]), 16))

    def isfile(self, path) -> bool:
        return os.path.isfile(path)

    def isdir(self, path) -> bool:
        return os.path.isdir(path)

    def iscopy(self, path):
        return not (System.is_symlink(path) or System.is_hardlink(path))

    def walk(self, top, topdown=True, onerror=None, **kwargs):
        """Directory fs generator.

        See `os.walk` for the docs. Differences:
        - no support for symlinks
        """
        for root, dirs, files in os.walk(top, topdown=topdown,
                                         onerror=onerror):
            yield os.path.normpath(root), dirs, files

    def find(self, path, prefix=None):
        for root, _, files in self.walk(path):
            for file in files:
                # NOTE: os.path.join is ~5.5 times slower
                yield f"{root}{os.sep}{file}"

    def is_empty(self, path):
        if self.isfile(path) and os.path.getsize(path) == 0:
            return True

        if self.isdir(path) and len(os.listdir(path)) == 0:
            return True

        return False

    def remove(self, path):
        remove(path)

    def makedirs(self, path, **kwargs):
        makedirs(path, exist_ok=kwargs.pop("exist_ok", True))

    def move(self, from_info, to_info):
        self.makedirs(self.path.parent(to_info))
        move(from_info, to_info)

    def copy(self, from_info, to_info):
        tmp_info = self.path.join(self.path.parent(to_info), tmp_fname(""))
        try:
            copyfile(from_info, tmp_info)
            os.rename(tmp_info, to_info)
        except Exception:
            self.remove(tmp_info)
            raise

    def upload_fobj(self, fobj, to_info, **kwargs):
        self.makedirs(self.path.parent(to_info))
        tmp_info = self.path.join(self.path.parent(to_info), tmp_fname(""))
        try:
            copy_fobj_to_file(fobj, tmp_info)
            os.rename(tmp_info, to_info)
        except Exception:
            self.remove(tmp_info)
            raise

    @staticmethod
    def symlink(from_info, to_info):
        System.symlink(from_info, to_info)

    @staticmethod
    def is_symlink(path):
        return System.is_symlink(path)

    def hardlink(self, from_info, to_info):
        # If there are a lot of empty files (which happens a lot in datasets),
        # and the cache type is `hardlink`, we might reach link limits and
        # will get something like: `too many links error`
        #
        # This is because all those empty files will have the same hash
        # (i.e. 68b329da9893e34099c7d8ad5cb9c940), therefore, they will be
        # linked to the same file in the cache.
        #
        # From https://en.wikipedia.org/wiki/Hard_link
        #   * ext4 limits the number of hard links on a file to 65,000
        #   * Windows with NTFS has a limit of 1024 hard links on a file
        #
        # That's why we simply create an empty file rather than a link.
        if self.getsize(from_info) == 0:
            self.open(to_info, "w").close()

            logger.debug("Created empty file: %s -> %s", from_info, to_info)
            return

        System.hardlink(from_info, to_info)

    @staticmethod
    def is_hardlink(path):
        return System.is_hardlink(path)

    def reflink(self, from_info, to_info):
        System.reflink(from_info, to_info)

    def info(self, path):
        return self.fs.info(path)

    def put_file(self,
                 from_file,
                 to_info,
                 callback=DEFAULT_CALLBACK,
                 **kwargs):
        parent = self.path.parent(to_info)
        makedirs(parent, exist_ok=True)
        tmp_file = self.path.join(parent, tmp_fname())
        copyfile(from_file, tmp_file, callback=callback)
        os.replace(tmp_file, to_info)

    def get_file(self,
                 from_info,
                 to_file,
                 callback=DEFAULT_CALLBACK,
                 **kwargs):
        copyfile(from_info, to_file, callback=callback)
Пример #9
0
class FsspecLocalFileSystem(AbstractFileSystem):
    sep = os.sep

    def __init__(self, *args, **kwargs):
        from fsspec.implementations.local import LocalFileSystem as LocalFS

        super().__init__(*args, **kwargs)
        self.fs = LocalFS()

    def makedirs(self, path, exist_ok=False):
        makedirs(path, exist_ok=exist_ok)

    def mkdir(self, path, create_parents=True, **kwargs):
        if self.exists(path):
            raise FileExistsError(path)
        if create_parents:
            self.makedirs(path, exist_ok=True)
        else:
            os.mkdir(path, **kwargs)

    def lexists(self, path, **kwargs):
        return os.path.lexists(path)

    def exists(self, path, **kwargs):
        # TODO: replace this with os.path.exists once the problem is fixed on
        # the fsspec https://github.com/intake/filesystem_spec/issues/742
        return os.path.lexists(path)

    def checksum(self, path) -> str:
        from fsspec.utils import tokenize

        st = os.stat(path)
        return str(int(tokenize([st.st_ino, st.st_mtime, st.st_size]), 16))

    def info(self, path, **kwargs):
        return self.fs.info(path)

    def ls(self, path, **kwargs):
        return self.fs.ls(path, **kwargs)

    def isfile(self, path) -> bool:
        return os.path.isfile(path)

    def isdir(self, path) -> bool:
        return os.path.isdir(path)

    def walk(self, path, maxdepth=None, topdown=True, **kwargs):
        """Directory fs generator.

        See `os.walk` for the docs. Differences:
        - no support for symlinks
        """
        for root, dirs, files in os.walk(
            path,
            topdown=topdown,
        ):
            yield os.path.normpath(root), dirs, files

    def find(self, path, **kwargs):
        for root, _, files in self.walk(path, **kwargs):
            for file in files:
                # NOTE: os.path.join is ~5.5 times slower
                yield f"{root}{os.sep}{file}"

    @classmethod
    def _parent(cls, path):
        return os.path.dirname(path)

    def put_file(self, lpath, rpath, callback=None, **kwargs):
        parent = self._parent(rpath)
        makedirs(parent, exist_ok=True)
        tmp_file = os.path.join(parent, tmp_fname())
        copyfile(lpath, tmp_file, callback=callback)
        os.replace(tmp_file, rpath)

    def get_file(self, rpath, lpath, callback=None, **kwargs):
        copyfile(rpath, lpath, callback=callback)

    def mv(self, path1, path2, **kwargs):
        self.makedirs(self._parent(path2), exist_ok=True)
        move(path1, path2)

    def rmdir(self, path):
        os.rmdir(path)

    def rm_file(self, path):
        remove(path)

    def rm(self, path, recursive=False, maxdepth=None):
        remove(path)

    def copy(self, path1, path2, recursive=False, on_error=None, **kwargs):
        tmp_info = os.path.join(self._parent(path2), tmp_fname(""))
        try:
            copyfile(path1, tmp_info)
            os.rename(tmp_info, path2)
        except Exception:
            self.rm_file(tmp_info)
            raise

    def open(self, path, mode="r", encoding=None, **kwargs):
        return open(path, mode=mode, encoding=encoding)

    def symlink(self, path1, path2):
        return System.symlink(path1, path2)

    @staticmethod
    def is_symlink(path):
        return System.is_symlink(path)

    @staticmethod
    def is_hardlink(path):
        return System.is_hardlink(path)

    def hardlink(self, path1, path2):
        # If there are a lot of empty files (which happens a lot in datasets),
        # and the cache type is `hardlink`, we might reach link limits and
        # will get something like: `too many links error`
        #
        # This is because all those empty files will have the same hash
        # (i.e. 68b329da9893e34099c7d8ad5cb9c940), therefore, they will be
        # linked to the same file in the cache.
        #
        # From https://en.wikipedia.org/wiki/Hard_link
        #   * ext4 limits the number of hard links on a file to 65,000
        #   * Windows with NTFS has a limit of 1024 hard links on a file
        #
        # That's why we simply create an empty file rather than a link.
        if self.size(path1) == 0:
            self.open(path2, "w").close()

            logger.debug("Created empty file: %s -> %s", path1, path2)
            return

        return System.hardlink(path1, path2)

    def reflink(self, path1, path2):
        return System.reflink(path1, path2)