async def progress_message_parser(app: web.Application, data: bytes) -> None: # update corresponding project, node, progress value rabbit_message = ProgressRabbitMessage.parse_raw(data) try: project = await projects_api.update_project_node_progress( app, rabbit_message.user_id, f"{rabbit_message.project_id}", f"{rabbit_message.node_id}", progress=rabbit_message.progress, ) if project: messages: List[SocketMessageDict] = [{ "event_type": SOCKET_IO_NODE_UPDATED_EVENT, "data": { "project_id": project["uuid"], "node_id": rabbit_message.node_id, "data": project["workbench"][f"{rabbit_message.node_id}"], }, }] await send_messages(app, f"{rabbit_message.user_id}", messages) except ProjectNotFoundError: log.warning( "project related to received rabbitMQ progress message not found: '%s'", json_dumps(rabbit_message, indent=2), ) except NodeNotFoundError: log.warning( "node related to received rabbitMQ progress message not found: '%s'", json_dumps(rabbit_message, indent=2), )
def test_serialization_of_uuids(fake_data_dict: Dict[str, Any]): uuid_obj = uuid4() # NOTE the quotes around expected value assert json_dumps(uuid_obj) == f'"{uuid_obj}"' obj = {"ids": [uuid4() for _ in range(3)]} dump = json_dumps(obj) assert json.loads(dump) == export_uuids_to_str(obj)
def check_if_cluster_is_able_to_run_pipeline( node_id: NodeID, scheduler_info: Dict[str, Any], task_resources: Dict[str, Any], node_image: Image, cluster_id: ClusterID, ): logger.debug("Dask scheduler infos: %s", json_dumps(scheduler_info, indent=2)) workers = scheduler_info.get("workers", {}) def can_task_run_on_worker(task_resources: Dict[str, Any], worker_resources: Dict[str, Any]) -> bool: def gen_check(task_resources: Dict[str, Any], worker_resources: Dict[str, Any]) -> Iterable[bool]: for r in task_resources: yield worker_resources.get(r, 0) >= task_resources[r] return all(gen_check(task_resources, worker_resources)) def cluster_missing_resources( task_resources: Dict[str, Any], cluster_resources: Dict[str, Any]) -> List[str]: return [r for r in task_resources if r not in cluster_resources] cluster_resources_counter = collections.Counter() can_a_worker_run_task = False for worker in workers: worker_resources = workers[worker].get("resources", {}) cluster_resources_counter.update(worker_resources) if can_task_run_on_worker(task_resources, worker_resources): can_a_worker_run_task = True all_available_resources_in_cluster = dict(cluster_resources_counter) logger.debug( "Dask scheduler total available resources in cluster %s: %s, task needed resources %s", cluster_id, json_dumps(all_available_resources_in_cluster, indent=2), json_dumps(task_resources, indent=2), ) if can_a_worker_run_task: return # check if we have missing resources if missing_resources := cluster_missing_resources( task_resources, all_available_resources_in_cluster): raise MissingComputationalResourcesError( node_id=node_id, msg= f"Service {node_image.name}:{node_image.tag} cannot be scheduled " f"on cluster {cluster_id}: task needs '{task_resources}', " f"cluster has '{all_available_resources_in_cluster}', missing: '{missing_resources}'", )
def _patch(app_config: Dict) -> Dict[str, str]: assert isinstance(app_config, dict) print(" - app_config=\n", json_dumps(app_config, indent=1)) envs = convert_to_environ_vars(app_config) print(" - convert_to_environ_vars(app_cfg)=\n", json_dumps(envs, indent=1)) for env_key, env_value in envs.items(): monkeypatch.setenv(env_key, f"{env_value}") return envs
async def wrapped(request: web.Request): try: resp = await handler(request) return resp except KeyError as err: # NOTE: handles required request.match_info[*] or request.query[*] logger.debug(err, exc_info=True) raise web.HTTPBadRequest( reason=f"Expected parameter {err}") from err except ValidationError as err: # NOTE: pydantic.validate_arguments parses and validates -> ValidationError logger.debug(err, exc_info=True) raise web.HTTPUnprocessableEntity( text=json_dumps({"error": err.errors()}), content_type="application/json", ) from err except (InvalidParameterError, NoCommitError) as err: raise web.HTTPUnprocessableEntity(reason=str(err)) from err except NotFoundError as err: raise web.HTTPNotFound(reason=str(err)) from err except ProjectNotFoundError as err: logger.debug(err, exc_info=True) raise web.HTTPNotFound( reason= f"Project not found {err.project_uuid} or not accessible. Skipping snapshot" ) from err
def check_if_cluster_is_able_to_run_pipeline( node_id: NodeID, scheduler_info: Dict[str, Any], task_resources: Dict[str, Any], node_image: Image, cluster_id: ClusterID, ): logger.debug("Dask scheduler infos: %s", json_dumps(scheduler_info, indent=2)) workers = scheduler_info.get("workers", {}) def can_task_run_on_worker( task_resources: Dict[str, Any], worker_resources: Dict[str, Any] ) -> bool: def gen_check( task_resources: Dict[str, Any], worker_resources: Dict[str, Any] ) -> Iterable[bool]: for name, required_value in task_resources.items(): if required_value is None: yield True elif worker_has := worker_resources.get(name): yield worker_has >= required_value else: yield False return all(gen_check(task_resources, worker_resources))
def get(cls, suffix, process=True): handlers = cls() coro = getattr(handlers, "get_" + suffix) loop = asyncio.get_event_loop() data = loop.run_until_complete(coro(None)) return json.loads(json_dumps(data)) if process else data
async def parse_output_data( db_engine: Engine, job_id: str, data: TaskOutputData ) -> None: ( service_key, service_version, user_id, project_id, node_id, ) = parse_dask_job_id(job_id) logger.debug( "parsing output %s of dask task for %s:%s of user %s on project '%s' and node '%s'", json_dumps(data, indent=2), service_key, service_version, user_id, project_id, node_id, ) ports = await _create_node_ports( db_engine=db_engine, user_id=user_id, project_id=project_id, node_id=node_id, ) for port_key, port_value in data.items(): value_to_transfer: Optional[links.ItemValue] = None if isinstance(port_value, FileUrl): value_to_transfer = port_value.url else: value_to_transfer = port_value await (await ports.outputs)[port_key].set_value(value_to_transfer)
def done_dask_callback( dask_future: distributed.Future, task_to_future_map: Dict[str, distributed.Future], user_callback: UserCompleteCB, main_loop: asyncio.AbstractEventLoop, ): # NOTE: BEWARE we are called in a separate thread!! job_id = dask_future.key event_data: Optional[TaskStateEvent] = None logger.debug("task '%s' completed with status %s", job_id, dask_future.status) try: if dask_future.status == "error": task_exception = dask_future.exception( timeout=_DASK_FUTURE_TIMEOUT_S) task_traceback = dask_future.traceback( timeout=_DASK_FUTURE_TIMEOUT_S) event_data = TaskStateEvent( job_id=job_id, state=RunningState.FAILED, msg=json_dumps( traceback.format_exception(type(task_exception), value=task_exception, tb=task_traceback)), ) elif dask_future.cancelled(): event_data = TaskStateEvent(job_id=job_id, state=RunningState.ABORTED) else: task_result = cast( TaskOutputData, dask_future.result(timeout=_DASK_FUTURE_TIMEOUT_S)) assert task_result # no sec event_data = TaskStateEvent( job_id=job_id, state=RunningState.SUCCESS, msg=task_result.json(), ) except distributed.TimeoutError: event_data = TaskStateEvent( job_id=job_id, state=RunningState.FAILED, msg=f"Timeout error getting results of '{job_id}'", ) logger.error( "fetching result of '%s' timed-out, please check", job_id, exc_info=True, ) finally: # remove the future from the dict to remove any handle to the future, so the worker can free the memory task_to_future_map.pop(job_id) logger.debug("dispatching callback to finish task '%s'", job_id) assert event_data # nosec try: asyncio.run_coroutine_threadsafe(user_callback(event_data), main_loop) except Exception: # pylint: disable=broad-except logger.exception( "Unexpected issue while transmitting state to main thread")
def _get_environment_variables( compose_namespace: str, scheduler_data: SchedulerData, app_settings: AppSettings ) -> Dict[str, str]: registry_settings = app_settings.DIRECTOR_V2_DOCKER_REGISTRY rabbit_settings = app_settings.DIRECTOR_V2_RABBITMQ state_exclude = [] if scheduler_data.paths_mapping.state_exclude is not None: state_exclude = scheduler_data.paths_mapping.state_exclude return { "SIMCORE_HOST_NAME": scheduler_data.service_name, "DYNAMIC_SIDECAR_COMPOSE_NAMESPACE": compose_namespace, "DY_SIDECAR_PATH_INPUTS": f"{scheduler_data.paths_mapping.inputs_path}", "DY_SIDECAR_PATH_OUTPUTS": f"{scheduler_data.paths_mapping.outputs_path}", "DY_SIDECAR_STATE_PATHS": json_dumps( [f"{x}" for x in scheduler_data.paths_mapping.state_paths] ), "DY_SIDECAR_STATE_EXCLUDE": json_dumps([f"{x}" for x in state_exclude]), "DY_SIDECAR_USER_ID": f"{scheduler_data.user_id}", "DY_SIDECAR_PROJECT_ID": f"{scheduler_data.project_id}", "DY_SIDECAR_NODE_ID": f"{scheduler_data.node_uuid}", "POSTGRES_HOST": f"{app_settings.POSTGRES.POSTGRES_HOST}", "POSTGRES_ENDPOINT": f"{app_settings.POSTGRES.POSTGRES_HOST}:{app_settings.POSTGRES.POSTGRES_PORT}", "POSTGRES_PASSWORD": f"{app_settings.POSTGRES.POSTGRES_PASSWORD.get_secret_value()}", "POSTGRES_PORT": f"{app_settings.POSTGRES.POSTGRES_PORT}", "POSTGRES_USER": f"{app_settings.POSTGRES.POSTGRES_USER}", "POSTGRES_DB": f"{app_settings.POSTGRES.POSTGRES_DB}", "STORAGE_ENDPOINT": app_settings.STORAGE_ENDPOINT, "REGISTRY_AUTH": f"{registry_settings.REGISTRY_AUTH}", "REGISTRY_PATH": f"{registry_settings.REGISTRY_PATH}", "REGISTRY_URL": f"{registry_settings.REGISTRY_URL}", "REGISTRY_USER": f"{registry_settings.REGISTRY_USER}", "REGISTRY_PW": f"{registry_settings.REGISTRY_PW.get_secret_value()}", "REGISTRY_SSL": f"{registry_settings.REGISTRY_SSL}", "RABBIT_HOST": f"{rabbit_settings.RABBIT_HOST}", "RABBIT_PORT": f"{rabbit_settings.RABBIT_PORT}", "RABBIT_USER": f"{rabbit_settings.RABBIT_USER}", "RABBIT_PASSWORD": f"{rabbit_settings.RABBIT_PASSWORD.get_secret_value()}", "RABBIT_CHANNELS": json_dumps(rabbit_settings.RABBIT_CHANNELS), }
async def _upsert_snapshot( project_checksum: str, project: Union[RowProxy, SimpleNamespace], conn: SAConnection, ): # has changes wrt previous commit assert project_checksum # nosec insert_stmt = pg_insert(projects_vc_snapshots).values( checksum=project_checksum, content={ # FIXME: empty status produces a set() that sqlalchemy cannot serialize. Quick fix "workbench": json.loads(json_dumps(project.workbench)), "ui": json.loads(json_dumps(project.ui)), }, ) upsert_snapshot = insert_stmt.on_conflict_do_update( constraint=projects_vc_snapshots.primary_key, set_=dict(content=insert_stmt.excluded.content), ) await conn.execute(upsert_snapshot)
async def send_group_messages(app: Application, room: str, messages: Sequence[SocketMessageDict]) -> None: sio: AsyncServer = get_socket_server(app) send_tasks = [ sio.emit(message["event_type"], json_dumps(message["data"]), room=room) for message in messages ] await logged_gather(*send_tasks, reraise=False, log=log, max_concurrency=10)
async def test_app_client_session_json_serialize( server: TestServer, fake_data_dict: Dict[str, Any] ): session = get_client_session(server.app) resp = await session.post(server.make_url("/echo"), json=fake_data_dict) assert resp.status == 200 got = await resp.json() expected = json.loads(json_dumps(fake_data_dict)) assert got == expected
async def postgres_cleanup_ctx(app: web.Application) -> AsyncIterator[None]: settings = get_plugin_settings(app) aiopg_engine = await _ensure_pg_ready(settings) app[APP_DB_ENGINE_KEY] = aiopg_engine log.info("pg engine created %s", json_dumps(get_engine_state(app), indent=1)) yield # ------------------- if aiopg_engine is not app.get(APP_DB_ENGINE_KEY): log.critical( "app does not hold right db engine. Somebody has changed it??") await close_engine(aiopg_engine) log.debug( "pg engine created after shutdown %s (closed=%s): %s", aiopg_engine.dsn, aiopg_engine.closed, json_dumps(get_engine_state(app), indent=1), )
async def assemble_statics_json(app: web.Application): # NOTE: in devel model, the folder might be under construction # (qx-compile takes time), therefore we create statics.json # on_startup instead of upon setup # Adds general server settings app_settings = app[APP_SETTINGS_KEY] info: Dict = app_settings.to_client_statics() # Adds specifics to front-end app frontend_settings: FrontEndAppSettings = app_settings.WEBSERVER_FRONTEND info.update(frontend_settings.to_statics()) # cache computed statics.json app[APP_FRONTEND_CACHED_STATICS_JSON_KEY] = json_dumps(info)
def envelope_json_response( obj: Any, status_cls: Type[HTTPException] = web.HTTPOk) -> web.Response: # TODO: replace all envelope functionality form packages/service-library/src/servicelib/aiohttp/rest_responses.py # TODO: Remove middleware to envelope handler responses at packages/service-library/src/servicelib/aiohttp/rest_middlewares.py: envelope_middleware_factory and use instead this # TODO: review error_middleware_factory if issubclass(status_cls, HTTPError): enveloped = Envelope[Any](error=obj) else: enveloped = Envelope[Any](data=obj) return web.Response( text=json_dumps(enveloped.dict(**RESPONSE_MODEL_POLICY)), content_type="application/json", status=status_cls.status_code, )
async def update_project_node_outputs( app: web.Application, user_id: int, project_id: str, node_id: str, new_outputs: Optional[Dict], new_run_hash: Optional[str], ) -> Tuple[Dict, List[str]]: """ Updates outputs of a given node in a project with 'data' """ log.debug( "updating node %s outputs in project %s for user %s with %s: run_hash [%s]", node_id, project_id, user_id, json_dumps(new_outputs), new_run_hash, ) new_outputs = new_outputs or {} partial_workbench_data = { node_id: { "outputs": new_outputs, "runHash": new_run_hash }, } db = app[APP_PROJECT_DBAPI] updated_project, changed_entries = await db.patch_user_project_workbench( partial_workbench_data=partial_workbench_data, user_id=user_id, project_uuid=project_id, ) log.debug( "patched project %s, following entries changed: %s", project_id, pformat(changed_entries), ) updated_project = await add_project_states_for_user( user_id=user_id, project=updated_project, is_template=False, app=app) # changed entries come in the form of {node_uuid: {outputs: {changed_key1: value1, changed_key2: value2}}} # we do want only the key names changed_keys = changed_entries.get(node_id, {}).get("outputs", {}).keys() return updated_project, changed_keys
async def check_invitation(invitation: Optional[str], db: AsyncpgStorage, cfg: LoginOptions): confirmation = None if invitation: confirmation = await validate_confirmation_code(invitation, db, cfg) if confirmation: # FIXME: check if action=invitation?? log.info( "Invitation code used. Deleting %s", json_dumps(get_confirmation_info(cfg, confirmation), indent=1), ) await db.delete_confirmation(confirmation) else: raise web.HTTPForbidden( reason=("Invalid invitation code." "Your invitation was already used or might have expired." "Please contact our support team to get a new one."))
async def setup_director(app: FastAPI) -> None: if settings := app.state.settings.CATALOG_DIRECTOR: # init client-api logger.debug("Setup director at %s...", settings.base_url) director_client = DirectorApi(base_url=settings.base_url, app=app) # check that the director is accessible async for attempt in AsyncRetrying(**director_startup_retry_policy): with attempt: if not await director_client.is_responsive(): raise ValueError("Director-v0 is not responsive") logger.info( "Connection to director-v0 succeded [%s]", json_dumps(attempt.retry_state.retry_object.statistics), ) app.state.director_api = director_client
async def send_messages(app: Application, user_id: str, messages: Sequence[SocketMessageDict]) -> None: sio: AsyncServer = get_socket_server(app) socket_ids: List[str] = [] with managed_resource(user_id, None, app) as rt: socket_ids = await rt.find_socket_ids() send_tasks = deque() for sid in socket_ids: for message in messages: send_tasks.append( sio.emit(message["event_type"], json_dumps(message["data"]), room=sid)) await logged_gather(*send_tasks, reraise=False, log=log, max_concurrency=10)
async def patch_user_project_workbench( self, partial_workbench_data: Dict[str, Any], user_id: int, project_uuid: str) -> Tuple[Dict[str, Any], Dict[str, Any]]: """patches an EXISTING project from a user new_project_data only contains the entries to modify """ log.info("Patching project %s for user %s", project_uuid, user_id) async with self.engine.acquire() as conn: async with conn.begin() as _transaction: current_project: Dict = await self._get_project( conn, user_id, project_uuid, exclude_foreign=["tags"], include_templates=False, for_update=True, ) user_groups: List[RowProxy] = await self.__load_user_groups( conn, user_id) _check_project_permissions(current_project, user_id, user_groups, "write") def _patch_workbench( project: Dict[str, Any], new_partial_workbench_data: Dict[str, Any] ) -> Tuple[Dict[str, Any], Dict[str, Any]]: """patch the project workbench with the values in new_data and returns the changed project and changed values""" changed_entries = {} for node_key, new_node_data in new_partial_workbench_data.items( ): current_node_data = project.get("workbench", {}).get(node_key) if current_node_data is None: log.debug( "node %s is missing from project, no patch", node_key) raise NodeNotFoundError(project_uuid, node_key) # find changed keys changed_entries.update({ node_key: _find_changed_dict_keys( current_node_data, new_node_data, look_for_removed_keys=False, ) }) # patch current_node_data.update(new_node_data) return (project, changed_entries) new_project_data, changed_entries = _patch_workbench( current_project, partial_workbench_data) # update timestamps new_project_data["lastChangeDate"] = now_str() log.debug( "DB updating with new_project_data=%s", json_dumps(dict(new_project_data)), ) result = await conn.execute( # pylint: disable=no-value-for-parameter projects.update().values( **_convert_to_db_names(new_project_data) ).where(projects.c.id == current_project[projects.c.id.key] ).returning(literal_column("*"))) project: RowProxy = await result.fetchone() log.debug( "DB updated returned row project=%s", json_dumps(dict(project.items())), ) user_email = await self._get_user_email( conn, project.prj_owner) tags = await self._get_tags_by_project( conn, project_id=project[projects.c.id]) return ( _convert_to_schema_names(project, user_email, tags=tags), changed_entries, )
def get_dynamic_sidecar_spec( scheduler_data: SchedulerData, dynamic_sidecar_settings: DynamicSidecarSettings, dynamic_sidecar_network_id: str, swarm_network_id: str, settings: SimcoreServiceSettingsLabel, app_settings: AppSettings, ) -> Dict[str, Any]: """ The dynamic-sidecar is responsible for managing the lifecycle of the dynamic service. The director-v2 directly coordinates with the dynamic-sidecar for this purpose. """ compose_namespace = get_compose_namespace(scheduler_data.node_uuid) mounts = [ # docker socket needed to use the docker api { "Source": "/var/run/docker.sock", "Target": "/var/run/docker.sock", "Type": "bind", } ] # Docker does not allow mounting of subfolders from volumes as the following: # `volume_name/inputs:/target_folder/inputs` # `volume_name/outputs:/target_folder/inputs` # `volume_name/path/to/state/01:/target_folder/path_to_state_01` # # Two separate volumes are required to achieve the following on the spawned # dynamic-sidecar containers: # `volume_name_path_to_inputs:/target_folder/path/to/inputs` # `volume_name_path_to_outputs:/target_folder/path/to/outputs` # `volume_name_path_to_state_01:/target_folder/path/to/state/01` for path_to_mount in [ scheduler_data.paths_mapping.inputs_path, scheduler_data.paths_mapping.outputs_path, ]: mounts.append( DynamicSidecarVolumesPathsResolver.mount_entry( compose_namespace=compose_namespace, path=path_to_mount, node_uuid=scheduler_data.node_uuid, ) ) # state paths now get mounted via different driver and are synced to s3 automatically for path_to_mount in scheduler_data.paths_mapping.state_paths: # for now only enable this with dev features enabled if app_settings.DIRECTOR_V2_DEV_FEATURES_ENABLED: mounts.append( DynamicSidecarVolumesPathsResolver.mount_r_clone( compose_namespace=compose_namespace, path=path_to_mount, project_id=scheduler_data.project_id, node_uuid=scheduler_data.node_uuid, r_clone_settings=dynamic_sidecar_settings.DYNAMIC_SIDECAR_R_CLONE_SETTINGS, ) ) else: mounts.append( DynamicSidecarVolumesPathsResolver.mount_entry( compose_namespace=compose_namespace, path=path_to_mount, node_uuid=scheduler_data.node_uuid, ) ) endpoint_spec = {} if dynamic_sidecar_settings.DYNAMIC_SIDECAR_MOUNT_PATH_DEV is not None: dynamic_sidecar_path = dynamic_sidecar_settings.DYNAMIC_SIDECAR_MOUNT_PATH_DEV if dynamic_sidecar_path is None: log.warning( ( "Could not mount the sources for the dynamic-sidecar, please " "provide env var named DEV_SIMCORE_DYNAMIC_SIDECAR_PATH" ) ) else: mounts.append( { "Source": str(dynamic_sidecar_path), "Target": "/devel/services/dynamic-sidecar", "Type": "bind", } ) packages_path = ( dynamic_sidecar_settings.DYNAMIC_SIDECAR_MOUNT_PATH_DEV / ".." / ".." / "packages" ) mounts.append( { "Source": str(packages_path), "Target": "/devel/packages", "Type": "bind", } ) # expose this service on an empty port if dynamic_sidecar_settings.DYNAMIC_SIDECAR_EXPOSE_PORT: endpoint_spec["Ports"] = [ { "Protocol": "tcp", "TargetPort": dynamic_sidecar_settings.DYNAMIC_SIDECAR_PORT, } ] create_service_params = { "endpoint_spec": endpoint_spec, "labels": { # TODO: let's use a pydantic model with descriptions "io.simcore.zone": scheduler_data.simcore_traefik_zone, "port": f"{dynamic_sidecar_settings.DYNAMIC_SIDECAR_PORT}", "study_id": f"{scheduler_data.project_id}", "traefik.docker.network": scheduler_data.dynamic_sidecar_network_name, # also used for scheduling "traefik.enable": "true", f"traefik.http.routers.{scheduler_data.service_name}.entrypoints": "http", f"traefik.http.routers.{scheduler_data.service_name}.priority": "10", f"traefik.http.routers.{scheduler_data.service_name}.rule": "PathPrefix(`/`)", f"traefik.http.services.{scheduler_data.service_name}.loadbalancer.server.port": f"{dynamic_sidecar_settings.DYNAMIC_SIDECAR_PORT}", "type": ServiceType.MAIN.value, # required to be listed as an interactive service and be properly cleaned up "user_id": f"{scheduler_data.user_id}", # the following are used for scheduling "uuid": f"{scheduler_data.node_uuid}", # also needed for removal when project is closed "swarm_stack_name": dynamic_sidecar_settings.SWARM_STACK_NAME, "service_key": scheduler_data.key, "service_tag": scheduler_data.version, "paths_mapping": scheduler_data.paths_mapping.json(), "compose_spec": json_dumps(scheduler_data.compose_spec), "container_http_entry": scheduler_data.container_http_entry, "restart_policy": scheduler_data.restart_policy, }, "name": scheduler_data.service_name, "networks": [swarm_network_id, dynamic_sidecar_network_id], "task_template": { "ContainerSpec": { "Env": _get_environment_variables( compose_namespace, scheduler_data, app_settings ), "Hosts": [], "Image": dynamic_sidecar_settings.DYNAMIC_SIDECAR_IMAGE, "Init": True, "Labels": {}, "Mounts": mounts, }, "Placement": {"Constraints": []}, "RestartPolicy": { "Condition": "on-failure", "Delay": 5000000, "MaxAttempts": 2, }, # this will get overwritten "Resources": { "Limits": {"NanoCPUs": 2 * pow(10, 9), "MemoryBytes": 1 * pow(1024, 3)}, "Reservations": { "NanoCPUs": 1 * pow(10, 8), "MemoryBytes": 500 * pow(1024, 2), }, }, }, } inject_settings_to_create_service_params( labels_service_settings=settings, create_service_params=create_service_params, ) return create_service_params
async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: logger.debug( "Getting docker compose spec for service %s", scheduler_data.service_name ) dynamic_sidecar_client = get_dynamic_sidecar_client(app) dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint # Starts dynamic SIDECAR ------------------------------------- # creates a docker compose spec given the service key and tag compose_spec = assemble_spec( app=app, service_key=scheduler_data.key, service_tag=scheduler_data.version, paths_mapping=scheduler_data.paths_mapping, compose_spec=scheduler_data.compose_spec, container_http_entry=scheduler_data.container_http_entry, dynamic_sidecar_network_name=scheduler_data.dynamic_sidecar_network_name, ) await dynamic_sidecar_client.start_service_creation( dynamic_sidecar_endpoint, compose_spec ) # Starts PROXY ----------------------------------------------- # The entrypoint container name was now computed # continue starting the proxy # check values have been set by previous step if ( scheduler_data.dynamic_sidecar.dynamic_sidecar_id is None or scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id is None or scheduler_data.dynamic_sidecar.swarm_network_id is None or scheduler_data.dynamic_sidecar.swarm_network_name is None ): raise ValueError( ( "Expected a value for all the following values: " f"{scheduler_data.dynamic_sidecar.dynamic_sidecar_id=} " f"{scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id=} " f"{scheduler_data.dynamic_sidecar.swarm_network_id=} " f"{scheduler_data.dynamic_sidecar.swarm_network_name=}" ) ) dynamic_sidecar_settings: DynamicSidecarSettings = ( app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR ) async for attempt in AsyncRetrying( stop=stop_after_delay( dynamic_sidecar_settings.DYNAMIC_SIDECAR_WAIT_FOR_CONTAINERS_TO_START ), wait=wait_fixed(1), retry_error_cls=EntrypointContainerNotFoundError, before_sleep=before_sleep_log(logger, logging.WARNING), ): with attempt: if scheduler_data.dynamic_sidecar.service_removal_state.was_removed: # the service was removed while waiting for the operation to finish logger.warning( "Stopping `get_entrypoint_container_name` operation. " "Will no try to start the service." ) return entrypoint_container = await dynamic_sidecar_client.get_entrypoint_container_name( dynamic_sidecar_endpoint=dynamic_sidecar_endpoint, dynamic_sidecar_network_name=scheduler_data.dynamic_sidecar_network_name, ) logger.info( "Fetched container entrypoint name %s", entrypoint_container ) dynamic_sidecar_node_id = await get_node_id_from_task_for_service( scheduler_data.dynamic_sidecar.dynamic_sidecar_id, dynamic_sidecar_settings ) dynamic_sidecar_proxy_create_service_params = get_dynamic_proxy_spec( scheduler_data=scheduler_data, dynamic_sidecar_settings=dynamic_sidecar_settings, dynamic_sidecar_network_id=scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id, swarm_network_id=scheduler_data.dynamic_sidecar.swarm_network_id, swarm_network_name=scheduler_data.dynamic_sidecar.swarm_network_name, dynamic_sidecar_node_id=dynamic_sidecar_node_id, entrypoint_container_name=entrypoint_container, service_port=scheduler_data.service_port, ) logger.debug( "dynamic-sidecar-proxy create_service_params %s", json_dumps(dynamic_sidecar_proxy_create_service_params), ) # no need for the id any longer await create_service_and_get_id(dynamic_sidecar_proxy_create_service_params) scheduler_data.dynamic_sidecar.were_services_created = True scheduler_data.dynamic_sidecar.was_compose_spec_submitted = True
async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: dynamic_sidecar_settings: DynamicSidecarSettings = ( app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR ) # the dynamic-sidecar should merge all the settings, especially: # resources and placement derived from all the images in # the provided docker-compose spec # also other encodes the env vars to target the proper container director_v0_client: DirectorV0Client = _get_director_v0_client(app) # fetching project form DB and fetching user settings projects_repository = _fetch_repo_outside_of_request(app, ProjectsRepository) project: ProjectAtDB = await projects_repository.get_project( project_id=scheduler_data.project_id ) node_uuid_str = str(scheduler_data.node_uuid) node: Optional[Node] = project.workbench.get(node_uuid_str) boot_options = ( node.boot_options if node is not None and node.boot_options is not None else {} ) logger.info("%s", f"{boot_options=}") settings: SimcoreServiceSettingsLabel = await merge_settings_before_use( director_v0_client=director_v0_client, service_key=scheduler_data.key, service_tag=scheduler_data.version, service_user_selection_boot_options=boot_options, ) # these configuration should guarantee 245 address network network_config = { "Name": scheduler_data.dynamic_sidecar_network_name, "Driver": "overlay", "Labels": { "io.simcore.zone": f"{dynamic_sidecar_settings.TRAEFIK_SIMCORE_ZONE}", "com.simcore.description": f"interactive for node: {scheduler_data.node_uuid}", "uuid": f"{scheduler_data.node_uuid}", # needed for removal when project is closed }, "Attachable": True, "Internal": False, } dynamic_sidecar_network_id = await create_network(network_config) # attach the service to the swarm network dedicated to services swarm_network: Dict[str, Any] = await get_swarm_network( dynamic_sidecar_settings ) swarm_network_id: str = swarm_network["Id"] swarm_network_name: str = swarm_network["Name"] # start dynamic-sidecar and run the proxy on the same node dynamic_sidecar_create_service_params = get_dynamic_sidecar_spec( scheduler_data=scheduler_data, dynamic_sidecar_settings=dynamic_sidecar_settings, dynamic_sidecar_network_id=dynamic_sidecar_network_id, swarm_network_id=swarm_network_id, settings=settings, app_settings=app.state.settings, ) logger.debug( "dynamic-sidecar create_service_params %s", json_dumps(dynamic_sidecar_create_service_params), ) dynamic_sidecar_id = await create_service_and_get_id( dynamic_sidecar_create_service_params ) # update service_port and assing it to the status # needed by CreateUserServices action scheduler_data.service_port = extract_service_port_from_compose_start_spec( dynamic_sidecar_create_service_params ) # finally mark services created scheduler_data.dynamic_sidecar.dynamic_sidecar_id = dynamic_sidecar_id scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id = ( dynamic_sidecar_network_id ) scheduler_data.dynamic_sidecar.swarm_network_id = swarm_network_id scheduler_data.dynamic_sidecar.swarm_network_name = swarm_network_name scheduler_data.dynamic_sidecar.was_dynamic_sidecar_started = True
ComputationalBackendNotConnectedError, ComputationalSchedulerChangedError, ): # cleanup and re-raise if dask_client := self._cluster_to_client_map.pop( cluster.id, None): await dask_client.delete() raise except Exception as exc: # cleanup and re-raise if dask_client := self._cluster_to_client_map.pop( cluster.id, None): await dask_client.delete() logger.error( "could not create/access dask computational cluster %s", json_dumps(cluster), ) raise DaskClientAcquisisitonError(cluster=cluster, error=exc) from exc def setup(app: FastAPI, settings: DaskSchedulerSettings) -> None: async def on_startup() -> None: app.state.dask_clients_pool = await DaskClientsPool.create( app=app, settings=settings) async def on_shutdown() -> None: if app.state.dask_clients_pool: await app.state.dask_clients_pool.delete() app.add_event_handler("startup", on_startup)
) -> List[str]: return [r for r in task_resources if r not in cluster_resources] cluster_resources_counter = collections.Counter() can_a_worker_run_task = False for worker in workers: worker_resources = workers[worker].get("resources", {}) cluster_resources_counter.update(worker_resources) if can_task_run_on_worker(task_resources, worker_resources): can_a_worker_run_task = True all_available_resources_in_cluster = dict(cluster_resources_counter) logger.debug( "Dask scheduler total available resources in cluster %s: %s, task needed resources %s", cluster_id, json_dumps(all_available_resources_in_cluster, indent=2), json_dumps(task_resources, indent=2), ) if can_a_worker_run_task: return # check if we have missing resources if missing_resources := cluster_missing_resources( task_resources, all_available_resources_in_cluster ): raise MissingComputationalResourcesError( node_id=node_id, msg=f"Service {node_image.name}:{node_image.tag} cannot be scheduled " f"on cluster {cluster_id}: task needs '{task_resources}', " f"cluster has '{all_available_resources_in_cluster}', missing: '{missing_resources}'",
async def replace_user_project( self, new_project_data: Dict[str, Any], user_id: int, project_uuid: str, include_templates: Optional[bool] = False, ) -> Dict[str, Any]: """replaces a project from a user this method completely replaces a user project with new_project_data only keeping the old entries from the project workbench if they exists in the new project workbench. """ log.info("Updating project %s for user %s", project_uuid, user_id) async with self.engine.acquire() as conn: async with conn.begin() as _transaction: current_project: Dict = await self._get_project( conn, user_id, project_uuid, exclude_foreign=["tags"], include_templates=include_templates, for_update=True, ) user_groups: List[RowProxy] = await self.__load_user_groups( conn, user_id) _check_project_permissions(current_project, user_id, user_groups, "write") # uuid can ONLY be set upon creation if current_project["uuid"] != new_project_data["uuid"]: raise ProjectInvalidRightsError(user_id, new_project_data["uuid"]) # ensure the prj owner is always in the access rights owner_primary_gid = await self._get_user_primary_group_gid( conn, current_project[projects.c.prj_owner.key]) new_project_data.setdefault("accessRights", {}).update( _create_project_access_rights(owner_primary_gid, ProjectAccessRights.OWNER)) # update the workbench def _update_workbench(old_project: Dict[str, Any], new_project: Dict[str, Any]) -> None: # any non set entry in the new workbench is taken from the old one if available old_workbench = old_project["workbench"] new_workbench = new_project["workbench"] for node_key, node in new_workbench.items(): old_node = old_workbench.get(node_key) if not old_node: continue for prop in old_node: # check if the key is missing in the new node if prop not in node: # use the old value node[prop] = old_node[prop] return new_project _update_workbench(current_project, new_project_data) # update timestamps new_project_data["lastChangeDate"] = now_str() # now update it log.debug("DB updating with new_project_data=%s", json_dumps(new_project_data)) result = await conn.execute( # pylint: disable=no-value-for-parameter projects.update().values( **_convert_to_db_names(new_project_data) ).where(projects.c.id == current_project[projects.c.id.key] ).returning(literal_column("*"))) project: RowProxy = await result.fetchone() log.debug( "DB updated returned row project=%s", json_dumps(dict(project.items())), ) user_email = await self._get_user_email( conn, project.prj_owner) tags = await self._get_tags_by_project( conn, project_id=project[projects.c.id]) return _convert_to_schema_names(project, user_email, tags=tags)
update={ # TODO: HACK to overcome export from None -> string # SOLUTION 1: thumbnail should not be required (check with team!) # SOLUTION 2: make thumbnail nullable "thumbnail": faker.image_url(), } ) assert new_project.workbench is not None assert new_project.workbench node = new_project.workbench["fc9208d9-1a0a-430c-9951-9feaf1de3368"] assert node.inputs node.inputs["linspace_stop"] = 4 resp = await client.put( f"/v0/projects/{project_uuid}", data=json_dumps(new_project.dict(**REQUEST_MODEL_POLICY)), ) assert resp.status == HTTPStatus.OK, await resp.text() # RUN again them --------------------------------------------------------------------------- resp = await client.post( f"/v0/computation/pipeline/{project_uuid}:start", json=RUN_PROJECT.request_payload, ) data, _ = await assert_status(resp, web.HTTPCreated) assert project_uuid == data["pipeline_id"] ref_ids = data["ref_ids"] assert len(ref_ids) == 4 # GET iterations ----------------------------------------------------------------- # check iters 1, 2 and 3 share working copies
def test_serialization_of_nested_dicts(fake_data_dict: Dict[str, Any]): obj = {"data": fake_data_dict, "ids": [uuid4() for _ in range(3)]} dump = json_dumps(obj) assert json.loads(dump) == export_uuids_to_str(obj)