Пример #1
0
def test_recursive_paths():
    f, t = FSSpecPersistence.recursive_paths("/tmp", "/tmp")
    assert (f, t) == ("/tmp/*", "/tmp/")
    f, t = FSSpecPersistence.recursive_paths("/tmp/", "/tmp/")
    assert (f, t) == ("/tmp/*", "/tmp/")
    f, t = FSSpecPersistence.recursive_paths("/tmp/*", "/tmp")
    assert (f, t) == ("/tmp/*", "/tmp/")
Пример #2
0
def test_get_anonymous_filesystem():
    fp = FSSpecPersistence()
    fs = fp.get_anonymous_filesystem("/abc")
    assert fs is None
    fs = fp.get_anonymous_filesystem("s3://abc")
    assert fs is not None
    assert fs.protocol == ["s3", "s3a"]
Пример #3
0
def test_exists():
    fs = FSSpecPersistence()
    assert not fs.exists("/tmp/non-existent")

    with tempfile.TemporaryDirectory() as tdir:
        f = os.path.join(tdir, "f.txt")
        with open(f, "w") as fp:
            fp.write("hello")

        assert fs.exists(f)
Пример #4
0
def test_get():
    fs = FSSpecPersistence()
    with tempfile.TemporaryDirectory() as tdir:
        f = os.path.join(tdir, "f.txt")
        with open(f, "w") as fp:
            fp.write("hello")

        t = os.path.join(tdir, "t.txt")

        fs.get(f, t)
        with open(t, "r") as fp:
            assert fp.read() == "hello"
Пример #5
0
 def encode(
     self,
     ctx: FlyteContext,
     structured_dataset: StructuredDataset,
     structured_dataset_type: StructuredDatasetType,
 ) -> literals.StructuredDataset:
     uri = typing.cast(str, structured_dataset.uri) or ctx.file_access.get_random_remote_directory()
     if not ctx.file_access.is_remote(uri):
         Path(uri).mkdir(parents=True, exist_ok=True)
     path = os.path.join(uri, f"{0:05}")
     fp = FSSpecPersistence(data_config=ctx.file_access.data_config)
     filesystem = fp.get_filesystem(path)
     pq.write_table(structured_dataset.dataframe, strip_protocol(path), filesystem=filesystem)
     return literals.StructuredDataset(uri=uri, metadata=StructuredDatasetMetadata(structured_dataset_type))
Пример #6
0
def test_get_recursive():
    fs = FSSpecPersistence()
    with tempfile.TemporaryDirectory() as tdir:
        p = pathlib.Path(tdir)
        d = p.joinpath("d")
        d.mkdir()
        f = d.joinpath(d, "f.txt")
        with open(f, "w") as fp:
            fp.write("hello")

        o = p.joinpath("o")

        t = o.joinpath(o, "f.txt")
        fs.get(str(d), str(o), recursive=True)
        with open(t, "r") as fp:
            assert fp.read() == "hello"
Пример #7
0
def get_storage_options(cfg: DataConfig, uri: str) -> typing.Optional[typing.Dict]:
    protocol = FSSpecPersistence.get_protocol(uri)
    if protocol == S3:
        kwargs = s3_setup_args(cfg.s3)
        if kwargs:
            return kwargs
    return None
Пример #8
0
def test_get_protocol():
    assert FSSpecPersistence.get_protocol("s3://abc") == "s3"
    assert FSSpecPersistence.get_protocol("/abc") == "file"
    assert FSSpecPersistence.get_protocol("file://abc") == "file"
    assert FSSpecPersistence.get_protocol("gs://abc") == "gs"
    assert FSSpecPersistence.get_protocol("sftp://abc") == "sftp"
    assert FSSpecPersistence.get_protocol("abfs://abc") == "abfs"
Пример #9
0
    def decode(
        self,
        ctx: FlyteContext,
        flyte_value: literals.StructuredDataset,
        current_task_metadata: StructuredDatasetMetadata,
    ) -> pa.Table:
        uri = flyte_value.uri
        if not ctx.file_access.is_remote(uri):
            Path(uri).parent.mkdir(parents=True, exist_ok=True)
        _, path = split_protocol(uri)

        columns = None
        if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
            columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
        try:
            fp = FSSpecPersistence(data_config=ctx.file_access.data_config)
            fs = fp.get_filesystem(uri)
            return pq.read_table(path, filesystem=fs, columns=columns)
        except NoCredentialsError as e:
            logger.debug("S3 source detected, attempting anonymous S3 access")
            fs = FSSpecPersistence.get_anonymous_filesystem(uri)
            if fs is not None:
                return pq.read_table(path, filesystem=fs, columns=columns)
            raise e
Пример #10
0
def test_get_filesystem():
    fp = FSSpecPersistence()
    fs = fp.get_filesystem("/abc")
    assert fs is not None
    assert isinstance(fs, LocalFileSystem)
Пример #11
0
def test_construct_path():
    fs = FSSpecPersistence()
    assert fs.construct_path(True, False, "abc") == "file://abc"