Python AzureBlobFileSystem.exists примеры использования

Язык программирования: Python

Пространство имен/Пакет: adlfs

Класс/Тип: AzureBlobFileSystem

Метод/Функция: exists

Примеров на hotexamples.com: 3

Python AzureBlobFileSystem.exists - 3 примера найдено. Это лучшие примеры Python кода для adlfs.AzureBlobFileSystem.exists, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

AzureBlobFileSystem(30)

ls(23)

open(20)

rm(19)

mkdir(16)

rmdir(6)

find(5)

makedir(3)

exists(3)

info(3)

download(2)

upload(2)

touch(2)

glob(2)

isdir(2)

size(1)

setxattrs(1)

cp(1)

put(1)

makedirs(1)

cp_file(1)

cat(1)

getxattr(1)

get(1)

url(1)

Пример #1

Показать файл

def test_exists(storage):
    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)

    assert fs.exists("data/top_file.txt")
    assert fs.exists("data")
    assert fs.exists("data/")
    assert fs.exists("")
    assert not fs.exists("data/not-a-key")

Пример #2

Показать файл

Файл: test_spec.py Проект: parvinderahlawat/adlfs

def test_large_blob(storage):
    import tempfile
    import hashlib
    import io
    import shutil
    from pathlib import Path

    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)

    # create a 20MB byte array, ensure it's larger than blocksizes to force a
    # chuncked upload
    blob_size = 120_000_000
    # blob_size = 2_684_354_560
    assert blob_size > fs.blocksize
    assert blob_size > AzureBlobFile.DEFAULT_BLOCK_SIZE

    data = b"1" * blob_size
    _hash = hashlib.md5(data)
    expected = _hash.hexdigest()

    # create container
    fs.mkdir("chunk-container")

    # upload the data using fs.open
    path = "chunk-container/large-blob.bin"
    with fs.open(path, "ab") as dst:
        dst.write(data)

    assert fs.exists(path)
    assert fs.size(path) == blob_size

    del data

    # download with fs.open
    bio = io.BytesIO()
    with fs.open(path, "rb") as src:
        shutil.copyfileobj(src, bio)

    # read back the data and calculate md5
    bio.seek(0)
    data = bio.read()
    _hash = hashlib.md5(data)
    result = _hash.hexdigest()

    assert expected == result

    # do the same but using upload/download and a tempdir
    path = path = "chunk-container/large_blob2.bin"
    with tempfile.TemporaryDirectory() as td:
        local_blob: Path = Path(td) / "large_blob2.bin"
        with local_blob.open("wb") as fo:
            fo.write(data)
        assert local_blob.exists()
        assert local_blob.stat().st_size == blob_size

        fs.upload(str(local_blob), path)
        assert fs.exists(path)
        assert fs.size(path) == blob_size

        # download now
        local_blob.unlink()
        fs.download(path, str(local_blob))
        assert local_blob.exists()
        assert local_blob.stat().st_size == blob_size

Пример #3

Показать файл

class AzureDataLake(Source):
    """
    A class for pulling data from the Azure Data Lakes (gen1 and gen2).
    You can either connect to the lake in general or to a particular path,
    eg.
    lake = AzureDataLake(); lake.exists("a/b/c.csv")
    vs
    lake = AzureDataLake(path="a/b/c.csv"); lake.exists()

    Parameters
    ----------
    credentials : Dict[str, Any], optional
        A dictionary containing ACCOUNT_NAME and the following
        Service Principal credentials:
            - AZURE_TENANT_ID
            - AZURE_CLIENT_ID
            - AZURE_CLIENT_SECRET
    """

    def __init__(
        self,
        path: str = None,
        gen: int = 2,
        credentials: Dict[str, Any] = None,
        *args,
        **kwargs,
    ):

        credentials = credentials or local_config.get("AZURE_ADLS")

        super().__init__(*args, credentials=credentials, **kwargs)

        storage_account_name = self.credentials["ACCOUNT_NAME"]
        tenant_id = self.credentials["AZURE_TENANT_ID"]
        client_id = self.credentials["AZURE_CLIENT_ID"]
        client_secret = self.credentials["AZURE_CLIENT_SECRET"]

        self.path = path
        self.gen = gen
        self.storage_options = {
            "tenant_id": tenant_id,
            "client_id": client_id,
            "client_secret": client_secret,
        }
        if gen == 1:
            self.fs = AzureDatalakeFileSystem(
                store_name=storage_account_name,
                tenant_id=tenant_id,
                client_id=client_id,
                client_secret=client_secret,
            )
            self.base_url = f"adl://{storage_account_name}"
        elif gen == 2:
            self.storage_options["account_name"] = storage_account_name
            self.fs = AzureBlobFileSystem(
                account_name=storage_account_name,
                tenant_id=tenant_id,
                client_id=client_id,
                client_secret=client_secret,
            )
            self.base_url = f"az://"

    def upload(
        self,
        from_path: str,
        to_path: str = None,
        recursive: bool = False,
        overwrite: bool = False,
    ) -> None:
        """
        Upload file(s) to the lake.

        Args:
            from_path (str): Path to the local file(s) to be uploaded.
            to_path (str): Path to the destination file/folder
            recursive (bool): Set this to true if working with directories.
            overwrite (bool): Whether to overwrite the file(s) if they exist.

        Example:
        ```python
        from viadot.sources import AzureDataLake
        lake = AzureDataLake()
        lake.upload(from_path='tests/test.csv', to_path="sandbox/test.csv")
        ```
        """

        if self.gen == 1:
            raise NotImplemented(
                "Azure Data Lake Gen1 does not support simple file upload."
            )

        to_path = to_path or self.path
        self.fs.upload(
            lpath=from_path,
            rpath=to_path,
            recursive=recursive,
            overwrite=overwrite,
        )

    def exists(self, path: str = None) -> bool:
        """
        Check if a location exists in Azure Data Lake.

        Args:
            path (str): The path to check. Can be a file or a directory.

        Example:
        ```python
        from viadot.sources import AzureDataLake

        lake = AzureDataLake(gen=1)
        lake.exists("tests/test.csv")
        ```

        Returns:
            bool: Whether the paths exists.
        """
        path = path or self.path
        return self.fs.exists(path)

    def download(
        self,
        to_path: str,
        from_path: str = None,
        recursive: bool = False,
        overwrite: bool = True,
    ) -> None:
        if overwrite is False:
            raise NotImplemented(
                "Currently, only the default behavior (overwrite) is available."
            )

        from_path = from_path or self.path
        self.fs.download(rpath=from_path, lpath=to_path, recursive=recursive)

    def to_df(
        self,
        path: str = None,
        sep: str = "\t",
        quoting: int = 0,
        lineterminator: str = None,
        error_bad_lines: bool = None,
    ):
        if quoting is None:
            quoting = 0

        path = path or self.path
        url = os.path.join(self.base_url, path)

        if url.endswith(".csv"):
            df = pd.read_csv(
                url,
                storage_options=self.storage_options,
                sep=sep,
                quoting=quoting,
                lineterminator=lineterminator,
                error_bad_lines=error_bad_lines,
            )
        elif url.endswith(".parquet"):
            df = pd.read_parquet(url, storage_options=self.storage_options)
        else:
            raise ValueError("Only CSV and parquet formats are supported.")

        return df

    def ls(self, path: str = None) -> List[str]:
        path = path or self.path
        return self.fs.ls(path)

    def rm(self, path: str = None, recursive: bool = False):
        path = path or self.path
        self.fs.rm(path, recursive=recursive)

    def cp(self, from_path: str = None, to_path: str = None, recursive: bool = False):
        from_path = from_path or self.path
        to_path = to_path
        self.fs.cp(from_path, to_path, recursive=recursive)