async def start_web_socket(ws: WebSocket): while True: try: _ = await ws.receive_text() except WebSocketDisconnect: log_info( f'{ws.scope["client"]} - "WebSocket {ws.scope["path"]}" [disconnected]' ) break
async def catch_all_no_namespace(request: Request, rest_of_path: str): headers = generate_headers_downstream(request.headers) segments = rest_of_path.strip('/').split('/') namespace, name, version, resource = get_info(segments) \ if segments[0].startswith('@') \ else get_info([""] + segments) log_info("forward request to CDN:", namespace=namespace, name=name, version=version, resource=resource) return await get_raw_resource(namespace, name, version, resource, headers)
async def init_resources(config: Configuration): log_info("Ensure database resources") headers = await config.admin_headers if config.admin_headers else {} log_info("Successfully retrieved authorization for resources creation") await config.storage.ensure_bucket(headers=headers) log_info("resources initialization done")
async def init_resources(config: Configuration): log_info("Ensure database resources") headers = await config.admin_headers if config.admin_headers else {} log_info("Successfully retrieved authorization for resources creation") doc_db = config.data_client.docdb storage = config.data_client.storage table_ok, bucket_ok = await asyncio.gather( doc_db.ensure_table(headers=headers), storage.ensure_bucket(headers=headers)) if not bucket_ok or not table_ok: raise Exception( f"Problem during resources initialisation: table ok? {table_ok}; bucket ok? {bucket_ok}" ) log_info("resources initialization done")
async def post_metadata( request: Request, project_id: str, metadata_body: EditMetadata, configuration: Configuration = Depends(get_configuration)): headers = generate_headers_downstream(request.headers) doc_db, storage, assets_gtw = configuration.doc_db, configuration.storage, configuration.assets_gtw_client owner = configuration.default_owner req, workflow, description = await asyncio.gather( storage.get_json(path="projects/{}/requirements.json".format(project_id), owner=owner, headers=headers), storage.get_json(path="projects/{}/workflow.json".format(project_id), owner=owner, headers=headers), storage.get_json(path="projects/{}/description.json".format(project_id), owner=owner, headers=headers) ) log_info("Flux-Backend@Post metadata: got requirements and workflow") libraries = {**req['libraries'], **metadata_body.libraries} def get_package_id(factory_id: Union[str, Mapping[str, str]]): return "@youwol/" + factory_id.split("@")[1] if isinstance(factory_id, str) else factory_id['pack'] used_packages = {get_package_id(m["factoryId"]) for m in workflow["modules"] + workflow["plugins"]} log_info("Flux-Backend@Post metadata: used_packages", used_packages=used_packages) body = { "libraries": {name: version for name, version in libraries.items() if name in used_packages}, "using": {name: version for name, version in libraries.items()} } loading_graph = await assets_gtw.cdn_loading_graph(body=body, headers=headers) flux_packs = [p['name'] for p in loading_graph['lock'] if p['type'] == 'flux-pack'] log_info("Flux-Backend@Post metadata: got loading graph", loading_graph=loading_graph) used_libraries = {lib["name"]: lib["version"] for lib in loading_graph["lock"]} requirements = Requirements(fluxComponents=[], fluxPacks=flux_packs, libraries=used_libraries, loadingGraph=loading_graph) schema_version = description['schemaVersion'] if 'schemaVersion' else '0' coroutines = update_metadata(project_id=project_id, schema_version=schema_version, name=metadata_body.name, description=metadata_body.description, requirements=requirements, owner=owner, storage=storage, docdb=doc_db, headers=headers) await asyncio.gather(*coroutines) return {}
async def format_download_form(file_path: Path, base_path: Path, dir_path: Path, compress: bool, rename: str = None) \ -> FormData: if compress and get_content_encoding(file_path) == "br": path_log = "/".join(file_path.parts[2:]) start = time.time() if which('brotli'): log_info(f'brotlify (system) {path_log} ...') p = await asyncio.create_subprocess_shell( cmd=f'brotli {str(file_path)}', stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, shell=True) async for f in p.stderr: log_error(f.decode('utf-8')) await p.communicate() os.system(f'rm {str(file_path)}') os.system(f'mv {str(file_path)}.br {str(file_path)}') else: log_info(f'brotlify (python) {path_log}') start = time.time() compressed = brotli.compress(file_path.read_bytes()) with file_path.open("wb") as f: f.write(compressed) log_info(f'...{path_log} => {time.time() - start} s') data = open(str(file_path), 'rb').read() path_bucket = base_path / file_path.relative_to( dir_path) if not rename else base_path / rename return FormData(objectName=path_bucket, objectData=data, owner=Configuration.owner, objectSize=len(data), content_type=get_content_type(file_path.name), content_encoding=get_content_encoding(file_path.name))
async def init_resources( config: Configuration ): log_info("Ensure database resources") headers = await config.admin_headers if config.admin_headers else {} log_info("Successfully retrieved authorization for resources creation") doc_dbs = config.doc_dbs log_info("Ensure items_db table") items_ok = await doc_dbs.items_db.ensure_table(headers=headers) log_info("Ensure folders_db table") folders_ok = await doc_dbs.folders_db.ensure_table(headers=headers) log_info("Ensure drives_db table") drives_ok = await doc_dbs.drives_db.ensure_table(headers=headers) log_info("Ensure deleted_db table") deleted_ok = await doc_dbs.deleted_db.ensure_table(headers=headers) if not (items_ok and folders_ok and drives_ok and deleted_ok): raise Exception(f"Problem during doc-db's table initialisation {[items_ok, folders_ok, drives_ok, deleted_ok]}") log_info("resources initialization done")
async def resolve_loading_tree( request: Request, body: LoadingGraphBody, configuration: Configuration = Depends(get_configuration)): doc_db = configuration.doc_db headers = generate_headers_downstream(request.headers) libraries = {name: version for name, version in body.libraries.items()} log_info(f"Start resolving loading graph: {libraries}") latest_queries = [ name for name, version in libraries.items() if version == "latest" ] versions_resp = await asyncio.gather(*[ list_versions(request=request, name=name, configuration=configuration) for name in latest_queries ], return_exceptions=True) if any(isinstance(v, Exception) for v in versions_resp): packages_error = [ f"{name}#latest" for e, name in zip(versions_resp, latest_queries) if isinstance(e, Exception) ] raise PackagesNotFound( detail="Failed to retrieved latest version of package(s)", packages=packages_error) latest_versions = { name: resp.versions[0] for name, resp in zip(latest_queries, versions_resp) } explicit_versions = {**libraries, **latest_versions} log_info(f"Latest versions resolved", latest_versions=latest_versions, explicit_versions=explicit_versions) queries = [ doc_db.get_document(partition_keys={"library_name": name}, clustering_keys={ "version_number": get_version_number_str(version) }, owner=configuration.owner, headers=headers) for name, version in explicit_versions.items() ] dependencies = await asyncio.gather(*queries, return_exceptions=True) if any(isinstance(v, Exception) for v in dependencies): packages_error = [ f"{name}#{version}" for e, (name, version) in zip(dependencies, explicit_versions.items()) if isinstance(e, Exception) ] raise PackagesNotFound( detail="Failed to retrieved explicit version of package(s)", packages=packages_error) dependencies_dict = {d["library_name"]: d for d in dependencies} async def add_missing_dependencies(missing_previous_loop=None): """ It maybe the case where some dependencies are missing in the provided body, here we fetch using 'body.using' or the latest version of them""" flatten_dependencies = set( flatten([[p.split("#")[0] for p in package['dependencies']] for package in dependencies_dict.values()])) missing = [ d for d in flatten_dependencies if d not in dependencies_dict ] if not missing: return dependencies_dict if missing_previous_loop and missing == missing_previous_loop: raise PackagesNotFound( detail="Indirect dependencies not found in the CDN", packages=missing) def get_dependency(dependency): if dependency in body.using: return get_query_version(configuration.doc_db, dependency, body.using[dependency], headers) return get_query_latest(configuration.doc_db, dependency, headers) versions = await asyncio.gather( *[get_dependency(dependency) for dependency in missing], return_exceptions=True) if any(len(v["documents"]) == 0 for v in versions): raise PackagesNotFound( detail="Failed to retrieve a version of indirect dependencies", packages=[ f"{name}#{body.using.get(name,'latest')}" for v, name in zip(versions, missing) if len(v["documents"]) == 0 ]) versions = list(flatten([d['documents'] for d in versions])) for version in versions: lib_name = version["library_name"] dependencies_dict[lib_name] = version return await add_missing_dependencies(missing_previous_loop=missing) await add_missing_dependencies() items_dict = { d["library_name"]: [to_package_id(d["library_name"]), get_url(d)] for d in dependencies_dict.values() } r = loading_graph([], dependencies_dict.values(), items_dict) lock = [ Library(name=d["library_name"], version=d["version"], namespace=d["namespace"], id=to_package_id(d["library_name"]), type=d["type"], fingerprint=d["fingerprint"]) for d in dependencies_dict.values() ] return LoadingGraphResponseV1(graphType="sequential-v1", lock=lock, definition=r)
async def init_resources(config: Configuration): log_info("Ensure database resources") headers = await config.admin_headers if config.admin_headers else {} log_info("Successfully retrieved authorization for resources creation") log_info("Ensure assets table") table1_ok = await config.doc_db_asset.ensure_table(headers=headers) if not table1_ok: raise Exception("Problem during docdb_asset resources initialisation") log_info("Ensure assets bucket") bucket_ok = await config.storage.ensure_bucket(headers=headers) if not bucket_ok: raise Exception("Problem during bucket initialisation") log_info("Ensure access policy table") table2_ok = await asyncio.gather( config.doc_db_access_policy.ensure_table(headers=headers)) if not table2_ok: raise Exception( "Problem during docdb_access_policy resources initialisation") log_info("Ensure access history table") table3_ok = await asyncio.gather( config.doc_db_access_history.ensure_table(headers=headers)) if not table3_ok: raise Exception( "Problem during docdb_access_history resources initialisation") log_info("resources initialization done")
async def publish_package(file: IO, filename: str, content_encoding, configuration, headers): if content_encoding not in ['identity', 'brotli']: raise HTTPException( status_code=422, detail="Only identity and brotli encoding are accepted ") need_compression = content_encoding == 'identity' dir_path = Path("./tmp_zips") / str(uuid4()) zip_path = (dir_path / filename).with_suffix('.zip') os.makedirs(dir_path) try: log_info("extract .zip file...") compressed_size = extract_zip_file(file, zip_path, dir_path, delete_original=False) log_info("...zip extracted", compressed_size=compressed_size) package_path = next( flatten([[Path(root) / f for f in files if f == "package.json"] for root, _, files in os.walk(dir_path)])) package_json = json.loads(open(package_path).read()) library_id = package_json["name"].replace("@", '') version = package_json["version"] base_path = Path('libraries') / library_id / version storage = configuration.storage paths = flatten([[Path(root) / f for f in files] for root, _, files in os.walk(dir_path)]) paths = [p for p in paths if p != zip_path] form_original = await format_download_form(zip_path, base_path, package_path.parent, need_compression, '__original.zip') forms = await asyncio.gather(*[ format_download_form(path, base_path, package_path.parent, need_compression) for path in paths ]) forms = list(forms) + [form_original] # the fingerprint in the md5 checksum of the included files after having eventually being compressed os.remove(zip_path) md5_stamp = md5_from_folder(dir_path) post_requests = [ storage.post_object(path=form.objectName, content=form.objectData, content_type=form.content_type, owner=Configuration.owner, headers=headers) for form in forms ] log_info(f"Clean directory {str(base_path)}") await storage.delete_group(prefix=base_path, owner=Configuration.owner, headers=headers) log_info(f"Send {len(post_requests)} files to storage") await asyncio.gather(*post_requests) record = format_doc_db_record(package_path=package_path, fingerprint=md5_stamp) log_info("Create docdb document", record=record) await configuration.doc_db.create_document(record, owner=Configuration.owner, headers=headers) log_info("Done", md5_stamp=md5_stamp) return PublishResponse( name=package_json["name"], version=version, compressedSize=compressed_size, id=to_package_id(package_json["name"]), fingerprint=md5_stamp, url= f"{to_package_id(package_json['name'])}/{record['version']}/{record['bundle']}" ) finally: shutil.rmtree(dir_path)