def get(filetype="SVG", regex="\d{4}-\d{2}-\d{2}_.+"): client = Client.create_anonymous_client() blobs = filter( lambda b: re.match(f"{filetype}/{regex}", b.name), client.list_blobs(BUCKET), ) return list(blobs)
def download(country_code): client = Client.create_anonymous_client() def _download(blobs, extension): download_count = 0 if len(blobs): for blob in blobs: if blob.name.split("/")[-1].split('_')[0] == DATE: fname = f"{extension}s/{get_country(blob)}.{extension}" with open(fname, "wb+") as fileobj: client.download_blob_to_file(blob, fileobj) print( f"Download {country_code} {extension} complete. Saved to /{extension}s" ) download_count += 1 if download_count == 0: print(f"Could not find a {extension} file for code {country_code}") regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_M.+" blobs = get(filetype="SVG", regex=regex) _download(blobs, "svg") blobs = get(filetype="PDF", regex=regex) _download(blobs, "pdf")
def get_dataset( max_bytes=1024 * 1024, dir='data', ): def _remove_last_line(file): file.seek(0, os.SEEK_END) pos = file.tell() - 1 while pos > 0 and file.read(1) != "\n": pos -= 1 file.seek(pos, os.SEEK_SET) if pos > 0: file.seek(pos, os.SEEK_SET) file.truncate() from google.cloud.storage.client import Client client = Client.create_anonymous_client() bucket = client.get_bucket('quickdraw_dataset') blob_iterator = bucket.list_blobs(prefix='full/simplified') for blob in blob_iterator: file_name = '{}/{}'.format(dir, path.basename(blob.name)) with open(file_name, 'wb+') as f: blob.download_to_file( f, start=0, end=max_bytes, ) with open(file_name, 'r+') as f: _remove_last_line(f)
def download(country_code, svg, pdf): client = Client.create_anonymous_client() def _download(blobs, svg): if len(blobs): for blob in blobs: extension = "svg" if svg else "pdf" fname = f"{extension}s/{get_country(blob)}.{extension}" with open(fname, "wb+") as fileobj: client.download_blob_to_file(blob, fileobj) print( f"Download {country_code} {extension} complete. Saved to /{extension}s" ) else: print(f"Could not find a {extension} file for code {country_code}") if svg: regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_.+" blobs = get(filetype="SVG", regex=regex) _download(blobs, True) if pdf: regex = f"\d{{4}}-\d{{2}}-\d{{2}}_{country_code}_.+" blobs = get(filetype="PDF", regex=regex) _download(blobs, False)
def test_anonymous_client_access_to_public_bucket(): from google.cloud.storage.client import Client anonymous_client = Client.create_anonymous_client() bucket = anonymous_client.bucket(public_bucket) (blob,) = _helpers.retry_429_503(anonymous_client.list_blobs)( bucket, max_results=1, ) with tempfile.TemporaryFile() as stream: _helpers.retry_429_503(blob.download_to_file)(stream)