async def post_story( request: Request, story_id: str, body: PostStoryBody, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) doc_db_stories = configuration.doc_db_stories doc_db_docs = configuration.doc_db_documents story_resp = await doc_db_stories.query( query_body=f"story_id={story_id}#1", owner=Configuration.default_owner, headers=headers) story = story_resp['documents'][0] docs_resp = await doc_db_docs.query( query_body=f"document_id={story['root_document_id']}#1", owner=Configuration.default_owner, headers=headers) doc = {**docs_resp['documents'][0], **{"title": body.title}} await doc_db_docs.update_document(doc=doc, owner=Configuration.default_owner, headers=headers) return StoryResp(storyId=story_id, rootDocumentId=story['root_document_id'], title=body.title, authors=story['authors'])
async def get_story(request: Request, story_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) doc_db_stories = configuration.doc_db_stories doc_db_docs = configuration.doc_db_documents story, root_doc = await asyncio.gather( doc_db_stories.get_document(partition_keys={"story_id": story_id}, clustering_keys={}, owner=Configuration.default_owner, headers=headers), doc_db_docs.query(query_body=f"parent_document_id={story_id}#1", owner=Configuration.default_owner, headers=headers)) if not root_doc['documents']: raise HTTPException(status_code=500, detail="Can not find root document of story") if len(root_doc['documents']) > 1: raise HTTPException(status_code=500, detail="Multiple root documents can not exist") root_doc = root_doc['documents'][0] return StoryResp(storyId=story['story_id'], title=root_doc['title'], authors=story['authors'], rootDocumentId=root_doc['document_id'])
async def get_children_rec(document_id: str, start_index, chunk_size, headers, doc_db_docs: DocDbClient) -> List[str]: headers = generate_headers_downstream(headers) documents_resp = await doc_db_docs.query( query_body= f"parent_document_id={document_id},position>={start_index}#{chunk_size}", owner=Configuration.default_owner, headers=headers) direct_children = documents_resp["documents"] indirect_children = await asyncio.gather(*[ get_children_rec(document_id=d["document_id"], start_index=0, chunk_size=chunk_size, headers=headers, doc_db_docs=doc_db_docs) for d in direct_children ]) indirect_children = itertools.chain.from_iterable(indirect_children) if len(direct_children) == chunk_size: children_next = await get_children_rec( document_id=document_id, start_index=direct_children[-1]['order_index'] + 0.5, doc_db_docs=doc_db_docs, chunk_size=chunk_size, headers=headers) return [*direct_children, *indirect_children, *children_next] return [*direct_children, *indirect_children]
async def delete_document( request: Request, document_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) doc_db_docs = configuration.doc_db_documents storage = configuration.storage all_children = await get_children_rec(document_id=document_id, start_index=-math.inf, chunk_size=10, headers=headers, doc_db_docs=doc_db_docs) docs = await doc_db_docs.query(query_body=f"document_id={document_id}#1", owner=configuration.default_owner, headers=headers) document = docs['documents'][0] await asyncio.gather( *[ doc_db_docs.delete_document(doc=doc, owner=configuration.default_owner, headers=headers) for doc in [document, *all_children] ], *[ storage.delete(path=doc['content_id'], owner=configuration.default_owner, headers=headers) for doc in [document, *all_children] ]) return DeleteResp(deletedDocuments=len(all_children) + 1)
async def sync(request: Request, file: UploadFile = File(...), reset: bool = False, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) if reset: await asyncio.gather( configuration.doc_db.delete_table(headers=headers), configuration.storage.delete_bucket(force_not_empty=True, headers=headers)) await init_resources(configuration) dir_path, zip_path, zip_dir_name = create_tmp_folder(file.filename) try: compressed_size = extract_zip_file(file.file, zip_path, dir_path) files_count, libraries_count, namespaces = await synchronize( dir_path, zip_dir_name, configuration, headers) return SyncResponse(filesCount=files_count, librariesCount=libraries_count, compressedSize=compressed_size, namespaces=namespaces) finally: shutil.rmtree(dir_path)
async def delete_version_generic(request: Request, namespace: str, library_name: str, version: str, configuration: Configuration): headers = generate_headers_downstream(request.headers) doc_db = configuration.doc_db storage = configuration.storage namespace = namespace[ 1:] if namespace and namespace[0] == '@' else namespace doc = await doc_db.get_document( partition_keys={"library_name": f"@{namespace}/{library_name}"}, clustering_keys={"version_number": get_version_number_str(version)}, owner=configuration.owner, headers=headers) await doc_db.delete_document(doc=doc, owner=Configuration.owner, headers=headers) if namespace != "": await storage.delete_group( f"libraries/{namespace}/{library_name}/{version}", owner=Configuration.owner, headers=headers) else: await storage.delete_group(f"libraries/{library_name}/{version}", owner=Configuration.owner, headers=headers) return {"deletedCount": 1}
async def upload( request: Request, file: UploadFile = File(...), configuration: Configuration = Depends(get_configuration)): dir_path, zip_path, zip_dir_name = create_tmp_folder(file.filename) headers = generate_headers_downstream(request.headers) try: compressed_size, _ = extract_zip_file(file, zip_path, dir_path) projects_folder = flatten([[Path(root) for f in files if f == "workflow.json"] for root, _, files in os.walk(dir_path / zip_dir_name)]) projects_folder = list(projects_folder) projects = [create_project_from_json(folder) for folder in projects_folder] coroutines = [update_project(project_id=pid, owner=configuration.default_owner, project=project, storage=configuration.storage, docdb=configuration.doc_db, headers=headers) for pid, project in projects] coroutines_flat = flatten(coroutines) for chunk in chunks(coroutines_flat, 25): await asyncio.gather(*chunk) return UploadResponse(project_ids=[pid for pid, _ in projects]) finally: shutil.rmtree(dir_path)
async def move(request: Request, tree_id: str, body: MoveBody, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) tree_db, assets_db, assets_stores = configuration.treedb_client, configuration.assets_client, \ configuration.assets_stores() resp = await tree_db.get_entity(entity_id=tree_id, include_drives=False, headers=headers) group_id = resp['entity']['groupId'] body_move_tree = { 'targetId': tree_id, "destinationFolderId": body.destinationFolderId } moved = await tree_db.move(body=body_move_tree, headers=headers) async def regroup(tree_item): actual_asset = await get_asset_by_tree_id(request=request, tree_id=tree_item['itemId'], configuration=configuration) return await regroup_asset(request=request, asset=actual_asset, tree_item=tree_item, configuration=configuration) assets_to_regroup = [m for m in moved['items'] if m['groupId'] != group_id] await asyncio.gather( *[regroup(tree_item) for tree_item in assets_to_regroup]) return await get_children(request, folder_id=body.destinationFolderId, configuration=configuration)
async def post_data_generic( request: Request, package: str, name: str, body: JSON, namespace: str = None, configuration: Configuration = Depends(get_configuration) ): """ Args: request: incoming request package: name of the package (without include namespace) name: name of the data body: json data namespace: optional namespace of the package configuration: service's configuration Returns: empty response '{}' """ headers = generate_headers_downstream(request.headers) await configuration.storage.post_json( path=get_path(request=request, package=package, name=name, namespace=namespace), json=body, owner=configuration.default_owner, headers=headers ) return {}
async def regroup_asset(request: Request, asset: AssetResponse, tree_item: Dict[str, str], configuration: Configuration): headers = generate_headers_downstream(request.headers) new_group_id = tree_item['groupId'] tree_db, assets_db, assets_stores = configuration.treedb_client, configuration.assets_client, \ configuration.assets_stores() # from here we change the owner of the group, extra care is needed store = next(store for store in assets_stores if store.path_name == asset.kind) body_asset = {**asset.dict(), **{"groupId": new_group_id}} body_raw = AssetMeta(**body_asset) await asyncio.gather( assets_db.update_asset(asset_id=asset.assetId, body=body_asset, headers=headers), store.sync_asset_metadata(request=request, raw_id=asset.rawId, metadata=body_raw, headers=headers)) new_asset = ItemResponse( **{ **asset.dict(), **{ "groupId": new_group_id, "treeId": tree_item['itemId'], "borrowed": False, "folderId": tree_item['folderId'], "driveId": tree_item['driveId'] } }) return new_asset
async def list_deleted( request: Request, drive_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) doc_dbs = configuration.doc_dbs entities = await doc_dbs.deleted_db.query( query_body=f"drive_id={drive_id}#100", owner=configuration.public_owner, headers=headers) folders = [ FolderResponse(**{ **convert_out(f), **{ "folderId": f['deleted_id'] } }) for f in entities["documents"] if f['kind'] == 'folder' ] items = [ ItemResponse( **{ **convert_out(f), **{ "itemId": f['deleted_id'], "folderId": f['parent_folder_id'] } }) for f in entities["documents"] if f['kind'] == 'item' ] return ChildrenResponse(folders=folders, items=items)
async def get_children( request: Request, document_id: str, from_position: float = QueryParam(0, alias="from-position"), count: int = QueryParam(1000), configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) doc_db_docs = configuration.doc_db_documents from_position = position_format(from_position) query = Query(where_clause=[ WhereClause( column="parent_document_id", relation="eq", term=document_id), WhereClause(column="position", relation="geq", term=from_position), ]) documents_resp = await doc_db_docs.query(query_body=QueryBody( max_results=count, query=query), owner=Configuration.default_owner, headers=headers) documents = [d for d in documents_resp['documents']] return GetChildrenResp(documents=[ GetDocumentResp(storyId=d['story_id'], documentId=d['document_id'], parentDocumentId=d['parent_document_id'], title=d['title'], position=float(d['position']), contentId=d["content_id"]) for d in documents ])
async def post_document( request: Request, document_id: str, body: PostDocumentBody, configuration: Configuration = Depends(get_configuration)): content_id = document_id headers = generate_headers_downstream(request.headers) doc_db_docs = configuration.doc_db_documents storage = configuration.storage docs = await doc_db_docs.query(query_body=f"document_id={document_id}#1", owner=configuration.default_owner, headers=headers) document = docs['documents'][0] doc = {**document, **{"title": body.title}} coroutines = [ doc_db_docs.update_document(doc=doc, owner=Configuration.default_owner, headers=headers) ] if body.content: coroutines.append( storage.post_object(path=content_id, content=body.content, content_type=Configuration.text_content_type, owner=Configuration.default_owner, headers=headers)) await asyncio.gather(*coroutines) return format_document_resp(doc)
async def resolve_loading_tree( request: Request, body: LoadingGraphBody, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) resp = await configuration.cdn_client.query_loading_graph(body=body.dict(), headers=headers) return resp
async def get_drive(request: Request, drive_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) treedb = configuration.treedb_client resp = await treedb.get_drive(drive_id=drive_id, headers=headers) return DriveResponse(**resp)
async def get_package_generic( request: Request, library_name: str, version: str, metadata: bool = False, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) if version == 'latest': versions_resp = await list_versions(request=request, name=library_name, max_results=1, configuration=configuration) version = versions_resp.versions[0] doc_db = configuration.doc_db if metadata: try: d = await doc_db.get_document( partition_keys={"library_name": library_name}, clustering_keys={ "version_number": get_version_number_str(version) }, owner=configuration.owner, headers=headers) return Library(name=d["library_name"], version=d["version"], namespace=d["namespace"], id=to_package_id(d["library_name"]), type=d["type"], fingerprint=d["fingerprint"]) except HTTPException as e: if e.status_code == 404: raise PackagesNotFound(detail="Failed to retrieve a package", packages=[f"{library_name}#{version}"]) headers = generate_headers_downstream(request.headers) storage = configuration.storage path = Path("libraries") / library_name.strip( '@') / version / '__original.zip' content = await storage.get_bytes(path=path, owner=configuration.owner, headers=headers) return Response(content, media_type='multipart/form-data')
async def ensure_post_permission(request: Request, doc, configuration: Configuration): # only owning group can put/post ensure_group_permission(request=request, group_id=doc["group_id"]) headers = generate_headers_downstream(request.headers) doc_db = configuration.doc_db_asset return await doc_db.update_document(doc, owner=configuration.public_owner, headers=headers)
async def remove_item( request: Request, item_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) treedb = configuration.treedb_client await treedb.remove_item(item_id=item_id, headers=headers) return {}
async def delete_drive( request: Request, drive_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) treedb = configuration.treedb_client await treedb.delete_drive(drive_id=drive_id, headers=headers) return {}
async def ensure_post_permission( request: Request, docdb: DocDb, doc: Any, configuration: Configuration ): ensure_group_permission(request=request, group_id=doc["group_id"]) headers = generate_headers_downstream(request.headers) return await docdb.update_document(doc, owner=configuration.public_owner, headers=headers)
async def query_flat( request: Request, body: QueryFlatBody, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) resp = await configuration.assets_client.query(body=body.dict(), headers=headers) assets = [to_asset_resp(asset) for asset in resp["assets"]] return AssetsResponse(assets=assets)
async def get_drives( request: Request, group_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) treedb = configuration.treedb_client drives_resp = await treedb.get_drives(group_id=group_id, headers=headers) drives = [DriveResponse(**drive) for drive in drives_resp['drives']] return DrivesResponse(drives=drives)
async def get_folder( request: Request, folder_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) treedb = configuration.treedb_client resp = await treedb.get_folder(folder_id=folder_id, headers=headers) return to_folder_resp(resp)
async def delete_version_generic(request: Request, library_name: str, version: str, configuration: Configuration): headers = generate_headers_downstream(request.headers) await ensure_permission('write', request, library_name, configuration) cdn_client = configuration.cdn_client return await cdn_client.delete_version(library_name=library_name, version=version, headers=headers)
async def get(request: Request, asset_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) assets_client = configuration.assets_client asset, permissions = await asyncio.gather( assets_client.get(asset_id=asset_id, headers=headers), assets_client.get_permissions(asset_id=asset_id, headers=headers)) resp = to_asset_resp(asset=asset, permissions=permissions) return resp
async def get_component( request: Request, component_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) owner = configuration.default_owner component = await retrieve_component(component_id=component_id, owner=owner, storage=configuration.storage, doc_db_component=configuration.doc_db_component, headers=headers) return component
async def get_items_by_related_id( request: Request, related_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) treedb = configuration.treedb_client resp = await treedb.get_items_from_related_id(related_id=related_id, headers=headers) return ItemsResponse(items=[to_item_resp(item) for item in resp['items']])
async def publish(request: Request, file: UploadFile = File(...), content_encoding: str = Form('identity'), configuration: Configuration = Depends(get_configuration)): # https://www.toptal.com/python/beginners-guide-to-concurrency-and-parallelism-in-python # Publish needs to be done using a queue to elt the cdn pods fully available to fetch resources headers = generate_headers_downstream(request.headers) return await publish_package(file.file, file.filename, content_encoding, configuration, headers)
async def get_content( request: Request, content_id: str, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) content = await configuration.storage.get_text( path=content_id, owner=Configuration.default_owner, headers=headers) return Response(content=content, headers={"Content-Type": Configuration.text_content_type})
async def get_project( request: Request, project_id: str, configuration: Configuration = Depends(get_configuration) ): headers = generate_headers_downstream(request.headers) owner = configuration.default_owner project = await retrieve_project(project_id=project_id, owner=owner, storage=configuration.storage, headers=headers) return project