Пример #1
0
def test_mkdir_rm_recursive(storage):
    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)

    fs.mkdir("test_mkdir_rm_recursive")
    assert "test_mkdir_rm_recursive/" in fs.ls("")

    with fs.open("test_mkdir_rm_recursive/file.txt", "wb") as f:
        f.write(b"0123456789")

    with fs.open("test_mkdir_rm_recursive/dir/file.txt", "wb") as f:
        f.write(b"ABCD")

    with fs.open("test_mkdir_rm_recursive/dir/file2.txt", "wb") as f:
        f.write(b"abcdef")

    assert fs.find("test_mkdir_rm_recursive") == [
        "test_mkdir_rm_recursive/dir/file.txt",
        "test_mkdir_rm_recursive/dir/file2.txt",
        "test_mkdir_rm_recursive/file.txt",
    ]

    fs.rm("test_mkdir_rm_recursive", recursive=True)

    assert "test_mkdir_rm_recursive/" not in fs.ls("")
    assert fs.find("test_mkdir_rm_recursive") == []
Пример #2
0
def test_metadata_write(storage):
    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)
    fs.mkdir("test_metadata_write")
    data = b"0123456789"
    metadata = {"meta": "data"}

    # standard blob type
    with fs.open("test_metadata_write/file.txt", "wb", metadata=metadata) as f:
        f.write(data)
    info = fs.info("test_metadata_write/file.txt")
    assert info["metadata"] == metadata
    metadata_changed_on_write = {"meta": "datum"}
    with fs.open("test_metadata_write/file.txt",
                 "wb",
                 metadata=metadata_changed_on_write) as f:
        f.write(data)
    info = fs.info("test_metadata_write/file.txt")
    assert info["metadata"] == metadata_changed_on_write

    # append blob type
    new_metadata = {"data": "meta"}
    with fs.open("test_metadata_write/append-file.txt",
                 "ab",
                 metadata=metadata) as f:
        f.write(data)

    # try change metadata on block appending
    with fs.open("test_metadata_write/append-file.txt",
                 "ab",
                 metadata=new_metadata) as f:
        f.write(data)
    info = fs.info("test_metadata_write/append-file.txt")

    # azure blob client doesn't seem to support metadata mutation when appending blocks
    # lets be sure this behavior doesn't change as this would imply
    # a potential breaking change
    assert info["metadata"] == metadata

    # getxattr / setxattr
    assert fs.getxattr("test_metadata_write/file.txt", "meta") == "datum"
    fs.setxattrs("test_metadata_write/file.txt", metadata="data2")
    assert fs.getxattr("test_metadata_write/file.txt", "metadata") == "data2"
    assert fs.info("test_metadata_write/file.txt")["metadata"] == {
        "metadata": "data2"
    }

    # empty file and nested directory
    with fs.open("test_metadata_write/a/b/c/nested-file.txt",
                 "wb",
                 metadata=metadata) as f:
        f.write(b"")
    assert fs.getxattr("test_metadata_write/a/b/c/nested-file.txt",
                       "meta") == "data"
    fs.setxattrs("test_metadata_write/a/b/c/nested-file.txt", metadata="data2")
    assert fs.info(
        "test_metadata_write/a/b/c/nested-file.txt")["metadata"] == {
            "metadata": "data2"
        }
    fs.rmdir("test_metadata_write")
Пример #3
0
    def setup(self, stage=None):
        data_dir = "datasets/mnist"
        storage_options = {"account_name": "azuremlexamples"}
        fs = AzureBlobFileSystem(**storage_options)
        files = fs.ls(data_dir)

        train_len = 60000
        test_len = 10000

        for f in files:
            if "train-images" in f:
                self.X_train = self._read_images(gzip.open(fs.open(f)), train_len)
            elif "train-labels" in f:
                self.y_train = self._read_labels(gzip.open(fs.open(f)), train_len)
            elif "images" in f:
                self.X_test = self._read_images(gzip.open(fs.open(f)), test_len)
            elif "labels" in f:
                self.y_test = self._read_labels(gzip.open(fs.open(f)), test_len)

        self.ohe = OneHotEncoder().fit(self.y_train.reshape(-1, 1))

        self.mnist_train = list(
            zip(
                self.X_train, self.ohe.transform(self.y_train.reshape(-1, 1)).toarray(),
            )
        )
        self.mnist_test = list(
            zip(self.X_test, self.ohe.transform(self.y_test.reshape(-1, 1)).toarray(),)
        )
Пример #4
0
def test_mkdir_rmdir(storage):
    fs = AzureBlobFileSystem(
        account_name=storage.account_name,
        connection_string=CONN_STR,
    )

    fs.mkdir("new-container")
    assert "new-container/" in fs.ls("")
    assert fs.ls("new-container") == []

    with fs.open(path="new-container/file.txt", mode="wb") as f:
        f.write(b"0123456789")

    with fs.open("new-container/dir/file.txt", "wb") as f:
        f.write(b"0123456789")

    with fs.open("new-container/dir/file2.txt", "wb") as f:
        f.write(b"0123456789")

    # Check to verify you can skip making a directory if the container
    # already exists, but still create a file in that directory
    fs.mkdir("new-container/dir/file.txt", exist_ok=False)
    assert "new-container/" in fs.ls("")

    fs.mkdir("new-container/file2.txt", exist_ok=False)
    assert "new-container/file2.txt" in fs.ls("new-container")

    # Test to verify that the file contains expected contents
    with fs.open("new-container/file2.txt", "rb") as f:
        outfile = f.read()
    assert outfile == b""

    # Check that trying to overwrite an existing nested file in append mode works as expected
    fs.mkdir("new-container/dir/file2.txt", exist_ok=False)
    assert "new-container/dir/file2.txt" in fs.ls("new-container/dir")

    # Also verify you can make a nested directory structure
    fs.mkdir("new-container/dir2/file.txt", exist_ok=False)
    with fs.open("new-container/dir2/file.txt", "wb") as f:
        f.write(b"0123456789")
    assert "new-container/dir2/file.txt" in fs.ls("new-container/dir2")
    fs.rm("new-container/dir2", recursive=True)

    fs.rm("new-container/dir", recursive=True)
    assert fs.ls("new-container") == [
        "new-container/file.txt",
        "new-container/file2.txt",
    ]

    fs.rm("new-container/file.txt")
    fs.rm("new-container/file2.txt")
    fs.rmdir("new-container")

    assert "new-container/" not in fs.ls("")
Пример #5
0
def test_makedir_rmdir(storage, caplog):
    fs = AzureBlobFileSystem(
        account_name=storage.account_name,
        connection_string=CONN_STR,
    )

    fs.makedir("new-container")
    assert "new-container" in fs.ls("")
    assert fs.ls("new-container") == []

    with fs.open(path="new-container/file.txt", mode="wb") as f:
        f.write(b"0123456789")

    with fs.open("new-container/dir/file.txt", "wb") as f:
        f.write(b"0123456789")

    with fs.open("new-container/dir/file2.txt", "wb") as f:
        f.write(b"0123456789")

    # Verify that mkdir will raise an exception if the directory exists
    # and exist_ok is False
    with pytest.raises(FileExistsError):
        fs.makedir("new-container/dir/file.txt", exist_ok=False)

    # mkdir should raise an error if the container exists and
    # we try to create a nested directory, with exist_ok=False
    with pytest.raises(FileExistsError):
        fs.makedir("new-container/dir2", exist_ok=False)

    # Check that trying to overwrite an existing nested file in append mode works as expected
    # if exist_ok is True
    fs.makedir("new-container/dir/file2.txt", exist_ok=True)
    assert "new-container/dir/file2.txt" in fs.ls("new-container/dir")

    # Also verify you can make a nested directory structure
    with fs.open("new-container/dir2/file.txt", "wb") as f:
        f.write(b"0123456789")
    assert "new-container/dir2/file.txt" in fs.ls("new-container/dir2")
    fs.rm("new-container/dir2", recursive=True)

    fs.rm("new-container/dir", recursive=True)
    fs.touch("new-container/file2.txt")
    assert fs.ls("new-container") == [
        "new-container/file.txt",
        "new-container/file2.txt",
    ]

    fs.rm("new-container/file.txt")
    fs.rm("new-container/file2.txt")
    fs.rmdir("new-container")

    assert "new-container" not in fs.ls("")
Пример #6
0
def test_append_operation(storage):
    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)
    fs.mkdir("append-container")

    # Check that appending to an existing file works as expected
    with fs.open("append-container/append_file.txt", "ab") as f:
        f.write(b"0123456789")
    with fs.open("append-container/append_file.txt", "ab") as f:
        f.write(b"0123456789")
    with fs.open("new-container/dir/file2.txt", "rb") as f:
        outfile = f.read()
    assert outfile == b"01234567890123456789"

    fs.rm("append-container", recursive=True)
Пример #7
0
def test_fetch_second_half(storage):
    # Verify if length extends beyond the end of file, truncate the read
    fs = AzureBlobFileSystem(
        account_name=storage.account_name, connection_string=CONN_STR,
    )
    blob = fs.open("data/top_file.txt")
    assert len(blob._fetch_range(start=5, end=10)) == 5
Пример #8
0
def test_open_file(storage):
    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)
    f = fs.open("/data/root/a/file.txt")

    result = f.read()
    assert result == b"0123456789"
Пример #9
0
def test_cat(storage):
    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)
    fs.mkdir("catdir")
    data = b"0123456789"
    with fs.open("catdir/catfile.txt", "wb") as f:
        f.write(data)
    assert fs.cat("catdir/catfile.txt") == data
    fs.rm("catdir/catfile.txt")
Пример #10
0
def test_open_context_manager(storage, mocker):
    "test closing azure client with context manager"
    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)
    with fs.open("/data/root/a/file.txt") as f:
        close = mocker.patch.object(f.container_client, "close")
        result = f.read()
        assert result == b"0123456789"

    close.assert_called_once()
Пример #11
0
def test_open_file(storage, mocker):
    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)
    f = fs.open("/data/root/a/file.txt")

    result = f.read()
    assert result == b"0123456789"

    close = mocker.patch.object(f.container_client, "close")
    f.close()

    close.assert_called_once()
Пример #12
0
def test_url(storage):
    fs = AzureBlobFileSystem(
        account_name=storage.account_name, connection_string=CONN_STR, account_key=KEY
    )
    fs.mkdir("catdir")
    data = b"0123456789"
    with fs.open("catdir/catfile.txt", "wb") as f:
        f.write(data)

    import requests

    r = requests.get(fs.url("catdir/catfile.txt"))
    assert r.status_code == 200
    assert r.content == data

    fs.rm("catdir/catfile.txt")
Пример #13
0
def test_mkdir_rmdir(storage):
    fs = AzureBlobFileSystem(
        account_name=storage.account_name,
        connection_string=CONN_STR,
    )

    fs.mkdir("new-container")
    assert "new-container/" in fs.ls("")
    assert fs.ls("new-container") == []

    with fs.open(path="new-container/file.txt", mode="wb") as f:
        f.write(b"0123456789")

    with fs.open("new-container/dir/file.txt", "wb") as f:
        f.write(b"0123456789")

    with fs.open("new-container/dir/file2.txt", "wb") as f:
        f.write(b"0123456789")

    # Check to verify you can skip making a directory if the container
    # already exists, but still create a file in that directory
    fs.mkdir("new-container/dir/file.txt", exists_ok=True)
    assert "new-container/" in fs.ls("")

    fs.mkdir("new-container/file2.txt", exists_ok=True)
    with fs.open("new-container/file2.txt", "wb") as f:
        f.write(b"0123456789")
    assert "new-container/file2.txt" in fs.ls("new-container")

    fs.mkdir("new-container/dir/file2.txt", exists_ok=True)
    with fs.open("new-container/dir/file2.txt", "wb") as f:
        f.write(b"0123456789")
    assert "new-container/dir/file2.txt" in fs.ls("new-container/dir")

    # Also verify you can make a nested directory structure
    fs.mkdir("new-container/dir2/file.txt", exists_ok=True)
    with fs.open("new-container/dir2/file.txt", "wb") as f:
        f.write(b"0123456789")
    assert "new-container/dir2/file.txt" in fs.ls("new-container/dir2")
    fs.rm("new-container/dir2", recursive=True)

    fs.rm("new-container/dir", recursive=True)
    assert fs.ls("new-container") == [
        "new-container/file.txt",
        "new-container/file2.txt",
    ]

    fs.rm("new-container/file.txt")
    fs.rm("new-container/file2.txt")
    fs.rmdir("new-container")

    assert "new-container/" not in fs.ls("")
Пример #14
0
def test_deep_paths(storage):
    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)

    fs.mkdir("test_deep")
    assert "test_deep/" in fs.ls("")

    with fs.open("test_deep/a/b/c/file.txt", "wb") as f:
        f.write(b"0123456789")

    assert fs.ls("test_deep") == ["test_deep/a/"]
    assert fs.ls("test_deep/") == ["test_deep/a/"]
    assert fs.ls("test_deep/a") == ["test_deep/a/b/"]
    assert fs.ls("test_deep/a/") == ["test_deep/a/b/"]
    assert fs.find("test_deep") == ["test_deep/a/b/c/file.txt"]
    assert fs.find("test_deep/") == ["test_deep/a/b/c/file.txt"]
    assert fs.find("test_deep/a") == ["test_deep/a/b/c/file.txt"]
    assert fs.find("test_deep/a/") == ["test_deep/a/b/c/file.txt"]

    fs.rm("test_deep", recursive=True)

    assert "test_deep/" not in fs.ls("")
    assert fs.find("test_deep") == []
Пример #15
0
def test_fetch_first_half(storage):
    fs = AzureBlobFileSystem(
        account_name=storage.account_name, connection_string=CONN_STR,
    )
    blob = fs.open("data/top_file.txt")
    assert len(blob._fetch_range(start=0, end=5)) == 5
Пример #16
0
def test_large_blob(storage):
    import tempfile
    import hashlib
    import io
    import shutil
    from pathlib import Path

    fs = AzureBlobFileSystem(account_name=storage.account_name,
                             connection_string=CONN_STR)

    # create a 20MB byte array, ensure it's larger than blocksizes to force a
    # chuncked upload
    blob_size = 120_000_000
    # blob_size = 2_684_354_560
    assert blob_size > fs.blocksize
    assert blob_size > AzureBlobFile.DEFAULT_BLOCK_SIZE

    data = b"1" * blob_size
    _hash = hashlib.md5(data)
    expected = _hash.hexdigest()

    # create container
    fs.mkdir("chunk-container")

    # upload the data using fs.open
    path = "chunk-container/large-blob.bin"
    with fs.open(path, "ab") as dst:
        dst.write(data)

    assert fs.exists(path)
    assert fs.size(path) == blob_size

    del data

    # download with fs.open
    bio = io.BytesIO()
    with fs.open(path, "rb") as src:
        shutil.copyfileobj(src, bio)

    # read back the data and calculate md5
    bio.seek(0)
    data = bio.read()
    _hash = hashlib.md5(data)
    result = _hash.hexdigest()

    assert expected == result

    # do the same but using upload/download and a tempdir
    path = path = "chunk-container/large_blob2.bin"
    with tempfile.TemporaryDirectory() as td:
        local_blob: Path = Path(td) / "large_blob2.bin"
        with local_blob.open("wb") as fo:
            fo.write(data)
        assert local_blob.exists()
        assert local_blob.stat().st_size == blob_size

        fs.upload(str(local_blob), path)
        assert fs.exists(path)
        assert fs.size(path) == blob_size

        # download now
        local_blob.unlink()
        fs.download(path, str(local_blob))
        assert local_blob.exists()
        assert local_blob.stat().st_size == blob_size
Пример #17
0
def test_fetch_length_is_none(storage):
    fs = AzureBlobFileSystem(
        account_name=storage.account_name, connection_string=CONN_STR,
    )
    blob = fs.open("data/top_file.txt")
    assert len(blob._fetch_range(start=2, end=None)) == 8
Пример #18
0
def test_makedir_rmdir(storage, caplog):
    fs = AzureBlobFileSystem(
        account_name=storage.account_name,
        connection_string=CONN_STR,
    )

    fs.makedir("new-container")
    assert "new-container/" in fs.ls("")
    assert fs.ls("new-container") == []

    with fs.open(path="new-container/file.txt", mode="wb") as f:
        f.write(b"0123456789")

    with fs.open("new-container/dir/file.txt", "wb") as f:
        f.write(b"0123456789")

    with fs.open("new-container/dir/file2.txt", "wb") as f:
        f.write(b"0123456789")

    # Verify that mkdir will raise an exception if the directory exists
    # and exist_ok is False
    with pytest.raises(FileExistsError):
        fs.makedir("new-container/dir/file.txt", exist_ok=False)

    # Verify that mkdir creates a directory if exist_ok is False and the
    # directory does not exist
    fs.makedir("new-container/file2.txt", exist_ok=False)
    assert "new-container/file2.txt" in fs.ls("new-container")

    # Verify that mkdir will silently ignore an existing directory if
    # the directory exists and exist_ok is True
    fs.makedir("new-container/dir", exist_ok=True)
    assert "new-container/dir/" in fs.ls("new-container")

    # Test to verify that the file contains expected contents
    with fs.open("new-container/file2.txt", "rb") as f:
        outfile = f.read()
    assert outfile == b""

    # Check that trying to overwrite an existing nested file in append mode works as expected
    # if exist_ok is True
    fs.makedir("new-container/dir/file2.txt", exist_ok=True)
    assert "new-container/dir/file2.txt" in fs.ls("new-container/dir")

    # Also verify you can make a nested directory structure
    fs.makedir("new-container/dir2/file.txt", exist_ok=False)
    with fs.open("new-container/dir2/file.txt", "wb") as f:
        f.write(b"0123456789")
    assert "new-container/dir2/file.txt" in fs.ls("new-container/dir2")
    fs.rm("new-container/dir2", recursive=True)

    fs.rm("new-container/dir", recursive=True)
    assert fs.ls("new-container") == [
        "new-container/file.txt",
        "new-container/file2.txt",
    ]

    fs.rm("new-container/file.txt")
    fs.rm("new-container/file2.txt")
    fs.rmdir("new-container")

    assert "new-container/" not in fs.ls("")
Пример #19
0
def test_fetch_entire_blob(storage):
    fs = AzureBlobFileSystem(
        account_name=storage.account_name, connection_string=CONN_STR,
    )
    blob = fs.open("data/top_file.txt")
    assert len(blob._fetch_range(start=0, length=10)) == 10
Пример #20
0
 def test_connect_async_open_credential():
     fs = AzureBlobFileSystem(account_name=storage.account_name,
                              credential=DefaultAzureCredential())
     fs.open(path="")