Пример #1
0
def test_get_anonymous_filesystem():
    fp = FSSpecPersistence()
    fs = fp.get_anonymous_filesystem("/abc")
    assert fs is None
    fs = fp.get_anonymous_filesystem("s3://abc")
    assert fs is not None
    assert fs.protocol == ["s3", "s3a"]
Пример #2
0
    def decode(
        self,
        ctx: FlyteContext,
        flyte_value: literals.StructuredDataset,
        current_task_metadata: StructuredDatasetMetadata,
    ) -> pa.Table:
        uri = flyte_value.uri
        if not ctx.file_access.is_remote(uri):
            Path(uri).parent.mkdir(parents=True, exist_ok=True)
        _, path = split_protocol(uri)

        columns = None
        if current_task_metadata.structured_dataset_type and current_task_metadata.structured_dataset_type.columns:
            columns = [c.name for c in current_task_metadata.structured_dataset_type.columns]
        try:
            fp = FSSpecPersistence(data_config=ctx.file_access.data_config)
            fs = fp.get_filesystem(uri)
            return pq.read_table(path, filesystem=fs, columns=columns)
        except NoCredentialsError as e:
            logger.debug("S3 source detected, attempting anonymous S3 access")
            fs = FSSpecPersistence.get_anonymous_filesystem(uri)
            if fs is not None:
                return pq.read_table(path, filesystem=fs, columns=columns)
            raise e