Пример #1
0
def get(filetype="SVG", regex="\d{4}-\d{2}-\d{2}_.+"):
    client = Client.create_anonymous_client()
    blobs = filter(
        lambda b: re.match(f"{filetype}/{regex}", b.name),
        client.list_blobs(BUCKET),
    )
    return list(blobs)
def download(country_code):
    client = Client.create_anonymous_client()

    def _download(blobs, extension):

        download_count = 0

        if len(blobs):
            for blob in blobs:
                if blob.name.split("/")[-1].split('_')[0] == DATE:
                    fname = f"{extension}s/{get_country(blob)}.{extension}"
                    with open(fname, "wb+") as fileobj:

                        client.download_blob_to_file(blob, fileobj)

                    print(
                        f"Download {country_code} {extension} complete. Saved to /{extension}s"
                    )
                    download_count += 1

        if download_count == 0:
            print(f"Could not find a {extension} file for code {country_code}")

    regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_M.+"

    blobs = get(filetype="SVG", regex=regex)
    _download(blobs, "svg")

    blobs = get(filetype="PDF", regex=regex)
    _download(blobs, "pdf")
Пример #3
0
def get_dataset(
    max_bytes=1024 * 1024,
    dir='data',
):
    def _remove_last_line(file):
        file.seek(0, os.SEEK_END)
        pos = file.tell() - 1
        while pos > 0 and file.read(1) != "\n":
            pos -= 1
            file.seek(pos, os.SEEK_SET)
        if pos > 0:
            file.seek(pos, os.SEEK_SET)
            file.truncate()

    from google.cloud.storage.client import Client

    client = Client.create_anonymous_client()
    bucket = client.get_bucket('quickdraw_dataset')
    blob_iterator = bucket.list_blobs(prefix='full/simplified')
    for blob in blob_iterator:
        file_name = '{}/{}'.format(dir, path.basename(blob.name))
        with open(file_name, 'wb+') as f:
            blob.download_to_file(
                f,
                start=0,
                end=max_bytes,
            )
        with open(file_name, 'r+') as f:
            _remove_last_line(f)
def download(country_code, svg, pdf):
    client = Client.create_anonymous_client()

    def _download(blobs, svg):
        if len(blobs):
            for blob in blobs:
                extension = "svg" if svg else "pdf"
                fname = f"{extension}s/{get_country(blob)}.{extension}"
                with open(fname, "wb+") as fileobj:
                    client.download_blob_to_file(blob, fileobj)

            print(
                f"Download {country_code} {extension} complete. Saved to /{extension}s"
            )
        else:
            print(f"Could not find a {extension} file for code {country_code}")

    if svg:
        regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_.+"
        blobs = get(filetype="SVG", regex=regex)
        _download(blobs, True)
    if pdf:
        regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_.+"
        blobs = get(filetype="PDF", regex=regex)
        _download(blobs, False)
Пример #5
0
def test_anonymous_client_access_to_public_bucket():
    from google.cloud.storage.client import Client

    anonymous_client = Client.create_anonymous_client()
    bucket = anonymous_client.bucket(public_bucket)
    (blob,) = _helpers.retry_429_503(anonymous_client.list_blobs)(
        bucket, max_results=1,
    )
    with tempfile.TemporaryFile() as stream:
        _helpers.retry_429_503(blob.download_to_file)(stream)