async def test_datastore_export(creds: str, project: str, export_bucket_name: str): kind = 'PublicTestDatastoreExportModel' rand_uuid = str(uuid.uuid4()) async with aiohttp.ClientSession(conn_timeout=10, read_timeout=10) as s: ds = Datastore(project=project, service_file=creds, session=s) await ds.insert(Key(project, [PathElement(kind)]), properties={'rand_str': rand_uuid}) operation = await ds.export(export_bucket_name, kinds=[kind]) count = 0 while (count < 10 and operation and operation.metadata['common']['state'] == 'PROCESSING'): await asyncio.sleep(10) operation = await ds.get_datastore_operation(operation.name) count += 1 assert operation.metadata['common']['state'] == 'SUCCESSFUL' prefix_len = len(f'gs://{export_bucket_name}/') export_path = operation.metadata['outputUrlPrefix'][prefix_len:] storage = Storage(service_file=creds, session=s) files = await storage.list_objects(export_bucket_name, params={'prefix': export_path}) for file in files['items']: await storage.delete(export_bucket_name, file['name'])
def __init__(self, location, creds_path=None, trace_configs=None): from gcloud.aio.storage import Storage self.bucket_name = URL(location).host self.session = ClientSession(trace_configs=trace_configs) self.storage = Storage(service_file=creds_path, session=self.session) self.bucket = self.storage.get_bucket(self.bucket_name)
def cloud_store(bucket_name, content_type, session: ClientSession) -> CloudStore: appcred = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") storage = Storage(service_file=appcred, session=session) async def store(blob_name, payload): try: return await storage.upload(bucket_name, blob_name, payload, content_type=content_type) except ClientResponseError as e: if e.status == 404: storage_client = gstorage.Client() storage_client.create_bucket(bucket_name) return await storage.upload(bucket_name, blob_name, payload, content_type=content_type) if e.status == 500 or e.status == 503: await asyncio.sleep(2) return await storage.upload(bucket_name, blob_name, payload, content_type=content_type) return store
async def test_gcs_signed_url(bucket_name, creds, data): object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.txt' async with Session() as session: storage = Storage(service_file=creds, session=session) await storage.upload(bucket_name, object_name, data, force_resumable_upload=True) bucket = Bucket(storage, bucket_name) blob = await bucket.get_blob(object_name, session=session) iam_client = IamClient(service_file=creds, session=session) signed_url = await blob.get_signed_url(60, iam_client=iam_client) resp = await session.get(signed_url) try: downloaded_data: str = await resp.text() except (AttributeError, TypeError): downloaded_data: str = str(resp.text) try: assert data == downloaded_data finally: await storage.delete(bucket_name, blob.name)
async def read_data_from_dir(self, path, path_to=None): bucket_name = self.__get_bucket_name(path) async with Session() as session: storage = Storage( service_file=settings.GOOGLE_APPLICATION_CREDENTIALS, session=session) bucket = Bucket(storage, bucket_name) items = list(await bucket.list_blobs(prefix=self.__get_file_path(path))) result = list() base_file_path = self.__get_file_path(path) for item in items: file_location = item[len(base_file_path) + 1:] path_to_location = os.path.join( path_to, file_location) if path_to else None result.append({ "path": file_location, "location": path_to_location, "data": await self.read_data(path=os.path.join(path, file_location), path_to=path_to_location) }) return result
async def test_object_life_cycle(bucket_name, creds, uploaded_data, expected_data, file_extension): object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.{file_extension}' copied_object_name = f'copyof_{object_name}' async with Session() as session: storage = Storage(service_file=creds, session=session) await storage.upload(bucket_name, object_name, uploaded_data) bucket = storage.get_bucket(bucket_name) blob = await bucket.get_blob(object_name) constructed_result = await blob.download() assert constructed_result == expected_data direct_result = await storage.download(bucket_name, object_name) assert direct_result == expected_data await storage.copy(bucket_name, object_name, bucket_name, new_name=copied_object_name) direct_result = await storage.download(bucket_name, copied_object_name) assert direct_result == expected_data await storage.delete(bucket_name, object_name) await storage.delete(bucket_name, copied_object_name) with pytest.raises(ResponseError): await storage.download(bucket_name, object_name) with pytest.raises(ResponseError): await storage.download(bucket_name, copied_object_name)
async def test_datastore_export(creds: str, project: str, export_bucket_name: str): # N.B. when modifying this test, please also see `test_table_load_copy` in # `gcloud-aio-bigquery`. kind = 'PublicTestDatastoreExportModel' rand_uuid = str(uuid.uuid4()) async with Session() as s: ds = Datastore(project=project, service_file=creds, session=s) await ds.insert(Key(project, [PathElement(kind)]), properties={'rand_str': rand_uuid}) operation = await ds.export(export_bucket_name, kinds=[kind]) count = 0 while (count < 10 and operation and operation.metadata['common']['state'] == 'PROCESSING'): await sleep(10) operation = await ds.get_datastore_operation(operation.name) count += 1 assert operation.metadata['common']['state'] == 'SUCCESSFUL' prefix_len = len(f'gs://{export_bucket_name}/') export_path = operation.metadata['outputUrlPrefix'][prefix_len:] storage = Storage(service_file=creds, session=s) files = await storage.list_objects(export_bucket_name, params={'prefix': export_path}) for file in files['items']: await storage.delete(export_bucket_name, file['name'])
async def test_upload_multipart(bucket_name, creds, uploaded_data, expected_data, file_extension): object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.{file_extension}' async with Session() as session: storage = Storage(service_file=creds, session=session) res = await storage.upload( bucket_name, object_name, uploaded_data, metadata={'Content-Disposition': 'inline', 'metadata': {'a': 1, 'b': 2}}) try: assert res['name'] == object_name downloaded_data = await storage.download(bucket_name, res['name']) assert downloaded_data == expected_data downloaded_metadata = await storage.download_metadata(bucket_name, res['name']) assert downloaded_metadata.pop('contentDisposition') == 'inline' assert downloaded_metadata['metadata']['a'] == '1' assert downloaded_metadata['metadata']['b'] == '2' finally: # TODO: don't bother # await storage.delete(bucket_name, res['name']) pass
async def __aenter__(self): from gcloud.aio.storage import Storage self.session = ClientSession(trace_configs=self.trace_configs) await self.session.__aenter__() self.storage = Storage(service_file=self.creds_path, session=self.session) self.bucket = self.storage.get_bucket(self.bucket_name)
async def _upload(cache: Mapping[str, bytes], bucket, prefix): async with aiohttp.ClientSession() as session: client = Storage(session=session) all_ops = await asyncio.gather(*[ _upload_obj(client, bucket, prefix, key, val) for key, val in cache.items() ]) # TODO check that ops were succesful? return all_ops
async def test_metadata_resumable(bucket_name, creds): object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.txt' original_data = f'{uuid.uuid4().hex}' original_metadata = { 'Content-Disposition': 'inline', 'metadata': { 'a': 1, 'b': 2, 'c': [1, 2, 3], 'd': { 'a': 4, 'b': 5 } } } # Google casts all metadata elements as string. google_metadata = { 'Content-Disposition': 'inline', 'metadata': { 'a': str(1), 'b': str(2), 'c': str([1, 2, 3]), 'd': str({ 'a': 4, 'b': 5 }) } } async with Session() as session: storage = Storage(service_file=creds, session=session) # Without metadata res0 = await storage.upload(bucket_name, object_name, original_data, force_resumable_upload=True) data0 = await storage.download(bucket_name, res0['name']) await storage.download_metadata(bucket_name, res0['name']) # With metadata res = await storage.upload(bucket_name, object_name, original_data, metadata=original_metadata, force_resumable_upload=True) data = await storage.download(bucket_name, res['name']) data_metadata = await storage.download_metadata( bucket_name, res['name']) assert res['name'] == object_name assert str(data, 'utf-8') == original_data assert data == data0 assert data_metadata.pop('contentDisposition') == 'inline' assert data_metadata['metadata'] == google_metadata['metadata']
async def main(split_json, src_gcs_path, dst_gcs_path, sampling_rate, limit): images = json.load(split_json)["images"] async with Storage() as client: async for result in limited_as_completed_from_async_coro_gen( process_image_gen(images, src_gcs_path, dst_gcs_path, sampling_rate, client), limit, ): await result
async def async_upload_to_bucket(blob_name, file_obj): async with aiohttp.ClientSession() as session: storage = Storage(service_file='./cred/cred.json', session=session) status = await storage.upload( BUCKET_NAME, f'downloads/{blob_name}', file_obj, timeout=60, ) return status['mediaLink']
async def async_upload_to_bucket(blob_name, file_obj): async with aiohttp.ClientSession() as session: storage = Storage(service_file='project.json', session=session) status = await storage.upload( params["bucket_name"], f'uploads/{blob_name}', file_obj, timeout=60, ) return status['mediaLink']
async def test_download_range(bucket_name, creds, uploaded_data, range_header, expected_data, file_extension): object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.{file_extension}' async with Session() as session: storage = Storage(service_file=creds, session=session) res = await storage.upload(bucket_name, object_name, uploaded_data) downloaded_data = await storage.download( bucket_name, res['name'], headers={'Range': range_header}) assert expected_data == downloaded_data await storage.delete(bucket_name, res['name'])
async def test_upload_resumable(bucket_name, creds, project, uploaded_data, expected_data, file_extension): object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.{file_extension}' async with aiohttp.ClientSession() as session: storage = Storage(project, creds, session=session) res = await storage.upload(bucket_name, object_name, uploaded_data, force_resumable_upload=True) downloaded_data = await storage.download(bucket_name, res['name']) assert expected_data == downloaded_data await storage.delete(bucket_name, res['name'])
async def write_data(self, path, data): logging.debug('Start writing data to {path}'.format(path=path)) real_path = self.__get_file_path(path) bucket_name = self.__get_bucket_name(path) async with Session(timeout=settings.GOOGLE_CLOUD_STORAGE_UPLOAD_TIMEOUT ) as session: storage = Storage( service_file=settings.GOOGLE_APPLICATION_CREDENTIALS, session=session) return await storage.upload( bucket_name, real_path, data, timeout=settings.GOOGLE_CLOUD_STORAGE_UPLOAD_TIMEOUT)
async def read_data(self, path, path_to=None): bucket_name = self.__get_bucket_name(path) async with Session(timeout=settings.GOOGLE_CLOUD_STORAGE_UPLOAD_TIMEOUT ) as session: storage = Storage( service_file=settings.GOOGLE_APPLICATION_CREDENTIALS, session=session) data = await storage.download( bucket_name, self.__get_file_path(path), timeout=settings.GOOGLE_CLOUD_STORAGE_UPLOAD_TIMEOUT) if path_to: os.makedirs(os.path.dirname(path_to), exist_ok=True) open(path_to, 'wb').write(data) return data
async def initialize(self) -> None: logging.vlog(1, 'Initializing CardDb.') if FLAGS.carddb_local_file: logging.info('Initializing CardDb from local file: %s', FLAGS.carddb_local_file) with open(FLAGS.carddb_local_file, 'r') as fin: db_json = fin.read() else: logging.info('Initializing CardDb from cloud file: %s/%s', CARDDB_BUCKET, CARDDB_DB_FILE) storage = Storage() bucket = storage.get_bucket(CARDDB_BUCKET) blob = await bucket.get_blob(CARDDB_DB_FILE) db_json = await blob.download() logging.info('Loaded cloud file.') await self._parse_db_json(db_json) self._is_initialized.set()
async def test_object_life_cycle(bucket_name, creds, project, uploaded_data, expected_data, file_extension): object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.{file_extension}' async with aiohttp.ClientSession() as session: storage = Storage(project, creds, session=session) await storage.upload(bucket_name, object_name, uploaded_data) bucket = storage.get_bucket(bucket_name) blob = await bucket.get_blob(object_name) constructed_result = await blob.download() assert constructed_result == expected_data direct_result = await storage.download(bucket_name, object_name) assert direct_result == expected_data await storage.delete(bucket_name, object_name) with pytest.raises(aiohttp.client_exceptions.ClientResponseError): await storage.download(bucket_name, object_name)
async def doaj_trio(request): try: encoded_data = request.data string_data = encoded_data.decode() data = json.loads(string_data) if data["t"] == settings.token: async with Session() as session: storage = Storage(session=session) bucket = storage.get_bucket(bucket_name) blob = data["f"] print(blob) blob_object = await bucket.get_blob(blob) raw_data = await blob_object.download() journal_nlp = nlp(str(raw_data)[:100000]) user_nlp = nlp(data["d"]) sim = user_nlp.similarity(journal_nlp) return str(sim) else: return Response("Forbidden", status=403, mimetype="text/plain") except: return Response("Error", status=500, mimetype="text/plain")
async def test_object_life_cycle(uploaded_data, expected_data): bucket_name = 'talkiq-integration-test' object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.txt' async with aiohttp.ClientSession() as session: storage = Storage(PROJECT, CREDS, session=session) await storage.upload(bucket_name, object_name, uploaded_data) bucket = storage.get_bucket(bucket_name) blob = await bucket.get_blob(object_name) contructed_result = await blob.download_as_string() assert contructed_result == expected_data direct_result = await storage.download_as_string( bucket_name, object_name) assert direct_result == expected_data await storage.delete(bucket_name, object_name) with pytest.raises(aiohttp.client_exceptions.ClientResponseError): await storage.download_as_string(bucket_name, object_name)
async def test_gcs_signed_url(bucket_name, creds, data): object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.txt' async with aiohttp.ClientSession() as session: storage = Storage(service_file=creds, session=session) await storage.upload(bucket_name, object_name, data, force_resumable_upload=True) bucket = Bucket(storage, bucket_name) blob = await bucket.get_blob(object_name, session=session) iam_client = IamClient(service_file=creds, session=session) signed_url = await blob.get_signed_url(60, iam_client=iam_client) resp = await session.get(signed_url) resp.raise_for_status() downloaded_data = await resp.text() try: assert data == downloaded_data finally: await storage.delete(bucket_name, blob.name)
async def test_upload_resumable(bucket_name, creds, uploaded_data, expected_data, file_extension): object_name = f'{uuid.uuid4().hex}/{uuid.uuid4().hex}.{file_extension}' async with Session() as session: storage = Storage(service_file=creds, session=session) res = await storage.upload( bucket_name, object_name, uploaded_data, force_resumable_upload=True, metadata={'Content-Disposition': 'inline', 'metadata': {'a': 1, 'b': 2}}) downloaded_data = await storage.download(bucket_name, res['name']) assert expected_data == downloaded_data downloaded_metadata = await storage.download_metadata(bucket_name, res['name']) assert downloaded_metadata.pop('contentDisposition') == 'inline' assert downloaded_metadata['metadata']['a'] == '1' assert downloaded_metadata['metadata']['b'] == '2' await storage.delete(bucket_name, res['name'])
async def _download_blobs_async(blobs, flatten, sublength, path, max_concurrency=10, chunk_size=20): #max_queue = min(max_queue, len(blobs)) max_queue = len(blobs) max_concurrency = min(max_concurrency, max_queue) semaphore = asyncio.Semaphore(max_concurrency) async with aiohttp.ClientSession() as session: storage_client = Storage(session=session) progress = tqdm(total=len(blobs)) num_chunks = int(len(blobs) / chunk_size) + 1 for group in grouper_it(num_chunks, blobs): tasks = [] for blob in group: tasks.append( _download_blob_async(blob, semaphore, flatten, sublength, path, storage_client)) for task in tqdm(tasks): await task progress.update(1)
async def test_table_load_copy(creds: str, dataset: str, project: str, export_bucket_name: str) -> None: # pylint: disable=too-many-locals # N.B. this test relies on Datastore.export -- see `test_datastore_export` # in the `gcloud-aio-datastore` smoke tests. kind = 'PublicTestDatastoreExportModel' rand_uuid = str(uuid.uuid4()) async with Session() as s: ds = Datastore(project=project, service_file=creds, session=s) await ds.insert(Key(project, [PathElement(kind)]), properties={'rand_str': rand_uuid}) operation = await ds.export(export_bucket_name, kinds=[kind]) count = 0 while (count < 10 and operation and operation.metadata['common']['state'] == 'PROCESSING'): await sleep(10) operation = await ds.get_datastore_operation(operation.name) count += 1 assert operation.metadata['common']['state'] == 'SUCCESSFUL' # END: copy from `test_datastore_export` uuid_ = str(uuid.uuid4()).replace('-', '_') backup_entity_table = f'public_test_backup_entity_{uuid_}' copy_entity_table = f'{backup_entity_table}_copy' t = Table(dataset, backup_entity_table, project=project, service_file=creds, session=s) gs_prefix = operation.metadata['outputUrlPrefix'] gs_file = (f'{gs_prefix}/all_namespaces/kind_{kind}/' f'all_namespaces_kind_{kind}.export_metadata') await t.insert_via_load([gs_file], source_format=SourceFormat.DATASTORE_BACKUP) await sleep(10) source_table = await t.get() assert int(source_table['numRows']) > 0 await t.insert_via_copy(project, dataset, copy_entity_table) await sleep(10) t1 = Table(dataset, copy_entity_table, project=project, service_file=creds, session=s) copy_table = await t1.get() assert copy_table['numRows'] == source_table['numRows'] # delete the backup and copy table await t.delete() await t1.delete() # delete the export file in google storage # TODO: confugure the bucket with autodeletion prefix_len = len(f'gs://{export_bucket_name}/') export_path = operation.metadata['outputUrlPrefix'][prefix_len:] storage = Storage(service_file=creds, session=s) files = await storage.list_objects(export_bucket_name, params={'prefix': export_path}) for file in files['items']: await storage.delete(export_bucket_name, file['name'])
async def _delete_item(bucket, item): async with aiohttp.ClientSession() as session: logging.debug(f"deleting {item} in {bucket}") client = Storage(session=session) return await client.delete(bucket, item)
async def _get(bucket, prefix, key): async with aiohttp.ClientSession() as session: client = Storage(session=session) return await client.download(bucket, os.path.join(prefix, key))
async def delete_items(bucket, items): async with aiohttp.ClientSession() as session: logging.debug(f"deleting {items} in {bucket}") client = Storage(session=session) return await asyncio.gather( *[client.delete(bucket, item) for item in items])
def initialize_container(self): self.session = aiohttp.ClientSession() self.storage_client = Storage(session=self.session)