Пример #1
0
async def test_local_dask_gateway_server(
        loop: AbstractEventLoop, local_dask_gateway_server: DaskGatewayServer):
    async with Gateway(
            local_dask_gateway_server.address,
            local_dask_gateway_server.proxy_address,
            asynchronous=True,
            auth=auth.BasicAuth("pytest_user",
                                local_dask_gateway_server.password),
    ) as gateway:
        print(f"--> {gateway=} created")
        cluster_options = await gateway.cluster_options()
        gateway_versions = await gateway.get_versions()
        clusters_list = await gateway.list_clusters()
        print(f"--> {gateway_versions=}, {cluster_options=}, {clusters_list=}")
        for option in cluster_options.items():
            print(f"--> {option=}")

        async with gateway.new_cluster() as cluster:
            assert cluster
            print(
                f"--> created new cluster {cluster=}, {cluster.scheduler_info=}"
            )
            NUM_WORKERS = 10
            await cluster.scale(NUM_WORKERS)
            print(f"--> scaling cluster {cluster=} to {NUM_WORKERS} workers")
            async for attempt in AsyncRetrying(reraise=True,
                                               wait=wait_fixed(0.24),
                                               stop=stop_after_delay(30)):
                with attempt:
                    print(
                        f"cluster {cluster=} has now {len(cluster.scheduler_info.get('workers', []))}"
                    )
                    assert len(cluster.scheduler_info.get("workers", 0)) == 10

            async with cluster.get_client() as client:
                print(f"--> created new client {client=}, submitting a job")
                res = await client.submit(lambda x: x + 1, 1)  # type: ignore
                assert res == 2

            print(f"--> scaling cluster {cluster=} back to 0")
            await cluster.scale(0)

            async for attempt in AsyncRetrying(reraise=True,
                                               wait=wait_fixed(0.24),
                                               stop=stop_after_delay(30)):
                with attempt:
                    print(
                        f"cluster {cluster=} has now {len(cluster.scheduler_info.get('workers', []))}"
                    )
                    assert len(cluster.scheduler_info.get("workers", 0)) == 0
Пример #2
0
async def wait_till_service_healthy(service_name: str, endpoint: URL):

    log.info(
        "Connecting to %s",
        f"{service_name=} at {endpoint=}",
    )
    async for attempt in AsyncRetrying(
            # randomizing healthchecks sampling helps parallel execution
            wait=wait_random(1, 2),
            # sets the timeout for a service to become healthy
            stop=stop_after_delay(2 * MINUTE),
            before_sleep=before_sleep_log(log, logging.WARNING),
            reraise=True,
    ):
        with attempt:
            async with aiohttp.ClientSession(
                    timeout=_ONE_SEC_TIMEOUT) as session:
                async with session.get(endpoint) as response:
                    # NOTE: Health-check endpoint require only a status code 200
                    # (see e.g. services/web/server/docker/healthcheck.py)
                    # regardless of the payload content
                    assert (
                        response.status == 200
                    ), f"Connection to {service_name=} at {endpoint=} failed with {response=}"

            log.info(
                "Connection to %s succeeded [%s]",
                f"{service_name=} at {endpoint=}",
                json.dumps(attempt.retry_state.retry_object.statistics),
            )
Пример #3
0
def _wait_for_deploy_lock(
    gcs_path: str, deploy_state: DeployState, timeout: timedelta
) -> DeployLockResult:
    """Tries to continually acquire the lock until the timeout."""

    def should_retry(result: DeployLockResult) -> bool:
        # We retry in the failed_to_get_lock_and_lock_state case. That's
        # assuming that the previous deploy finished and released the lock in
        # between when we first tried to check it.
        should_retry = result in (
            DeployLockResult.this_deploy_is_newer,
            DeployLockResult.failed_to_get_lock_and_lock_state,
        )
        print(f"should retry? condition: {result}, {should_retry=}")
        return should_retry

    # deploys take a while, so we don't retry that aggressively
    @tenacity.retry(
        retry=retry_if_result(should_retry),
        wait=wait_random_exponential(min=5, max=15),
        stop=stop_after_delay(timeout.total_seconds()),
    )
    def _wait_for_deploy_lock() -> DeployLockResult:
        return _acquire_deploy_lock_once(gcs_path, deploy_state)

    try:
        return _wait_for_deploy_lock()
    except tenacity.RetryError as e:
        print(f"Timed out after {timeout} waiting for deploy lock ({e}).")
        return DeployLockResult.timed_out
Пример #4
0
def test_product_frontend_app_served(
    simcore_stack_deployed_services: List[Service],
    traefik_service: URL,
    test_url: str,
    expected_in_content: str,
):
    # NOTE: it takes a bit of time until traefik sets up the correct proxy and
    # the webserver takes time to start
    # TODO: determine wait times with pre-calibration step
    @tenacity.retry(
        wait=wait_fixed(5),
        stop=stop_after_delay(1 * MINUTE),
    )
    def request_test_url():
        resp = requests.get(test_url)
        assert (
            resp.ok
        ), f"Failed request {resp.url} with {resp.status_code}: {resp.reason}"
        return resp

    resp = request_test_url()

    # TODO: serch osparc-simcore commit id e.g. 'osparc-simcore v817d82e'
    assert resp.ok
    assert "text/html" in resp.headers["Content-Type"]
    assert expected_in_content in resp.text, "Expected boot not found in response"
Пример #5
0
async def test_publish_event(dask_client: distributed.Client):
    dask_pub = distributed.Pub("some_topic")
    dask_sub = distributed.Sub("some_topic")
    async for attempt in AsyncRetrying(
            reraise=True,
            retry=retry_if_exception_type(AssertionError),
            wait=wait_fixed(0.01),
            stop=stop_after_delay(60),
    ):
        with attempt:
            print(
                f"waiting for subscribers... attempt={attempt.retry_state.attempt_number}"
            )
            assert dask_pub.subscribers
            print("we do have subscribers!")

    event_to_publish = TaskLogEvent(job_id="some_fake_job_id", log="the log")
    publish_event(dask_pub=dask_pub, event=event_to_publish)
    # NOTE: this tests runs a sync dask client,
    # and the CI seems to have sometimes difficulties having this run in a reasonable time
    # hence the long time out
    message = dask_sub.get(timeout=1)
    assert message is not None
    received_task_log_event = TaskLogEvent.parse_raw(message)  # type: ignore
    assert received_task_log_event == event_to_publish
Пример #6
0
async def _assert_and_wait_for_pipeline_state(
    client: TestClient,
    project_id: str,
    expected_state: RunningState,
    expected_api_response: ExpectedResponse,
):
    url_project_state = client.app.router["state_project"].url_for(
        project_id=project_id)
    assert url_project_state == URL(f"/{API_VTAG}/projects/{project_id}/state")
    async for attempt in AsyncRetrying(
            reraise=True,
            stop=stop_after_delay(120),
            wait=wait_fixed(5),
            retry=retry_if_exception_type(AssertionError),
    ):
        with attempt:
            print(
                f"--> waiting for pipeline to complete with {expected_state=} attempt {attempt.retry_state.attempt_number}..."
            )
            resp = await client.get(f"{url_project_state}")
            data, error = await assert_status(resp, expected_api_response.ok)
            assert "state" in data
            assert "value" in data["state"]
            received_study_state = RunningState(data["state"]["value"])
            print(f"<-- received pipeline state: {received_study_state=}")
            assert received_study_state == expected_state
            print(
                f"--> pipeline completed with state {received_study_state=}! "
                f"That's great: {json_dumps(attempt.retry_state.retry_object.statistics)}",
            )
Пример #7
0
async def test_websocket_disconnected_remove_or_maintain_files_based_on_role(
    client,
    logged_user,
    empty_user_project,
    mocked_director_v2_api,
    create_dynamic_service_mock,
    client_session_id_factory: Callable[[], str],
    socketio_client_factory: Callable,
    # asyncpg_storage_system_mock,
    storage_subsystem_mock,  # when guest user logs out garbage is collected
    expect_call: bool,
    exp_save_state: bool,
):
    set_service_deletion_delay(SERVICE_DELETION_DELAY, client.server.app)
    # login - logged_user fixture
    # create empty study - empty_user_project fixture
    # create dynamic service - create_dynamic_service_mock fixture
    service = await create_dynamic_service_mock(logged_user["id"],
                                                empty_user_project["uuid"])
    # create websocket
    client_session_id1 = client_session_id_factory()
    sio: socketio.AsyncClient = await socketio_client_factory(
        client_session_id1)
    # open project in client 1
    await open_project(client, empty_user_project["uuid"], client_session_id1)
    # logout
    logout_url = client.app.router["auth_logout"].url_for()
    r = await client.post(logout_url,
                          json={"client_session_id": client_session_id1})
    assert r.url_obj.path == logout_url.path
    await assert_status(r, web.HTTPOk)

    # ensure sufficient time is wasted here
    await asyncio.sleep(SERVICE_DELETION_DELAY + 1)
    await garbage_collector.collect_garbage(client.app)

    # assert dynamic service is removed
    calls = [
        call(
            app=client.server.app,
            save_state=exp_save_state,
            service_uuid=service["service_uuid"],
        )
    ]
    mocked_director_v2_api["director_v2_core.stop_service"].assert_has_calls(
        calls)

    # this call is done async, so wait a bit here to ensure it is correctly done
    async for attempt in AsyncRetrying(reraise=True,
                                       stop=stop_after_delay(10)):
        with attempt:
            if expect_call:
                # make sure `delete_project` is called
                storage_subsystem_mock[1].assert_called_once()
                # make sure `delete_user` is called
                # asyncpg_storage_system_mock.assert_called_once()
            else:
                # make sure `delete_project` not called
                storage_subsystem_mock[1].assert_not_called()
Пример #8
0
    def __init__(self, logger: Optional[logging.Logger] = None):
        logger = logger or log

        self.kwargs = dict(
            wait=wait_fixed(2),
            stop=stop_after_delay(3 * _MINUTE),
            before_sleep=before_sleep_log(logger, logging.WARNING),
            reraise=True,
        )
Пример #9
0
async def _wait_for_call(mocked_fct):
    async for attempt in AsyncRetrying(
            stop=stop_after_delay(10),
            wait=wait_random(0, 1),
            retry=retry_if_exception_type(AssertionError),
            reraise=True,
    ):
        with attempt:
            print(f"waiting for call in mocked fct {mocked_fct}, "
                  f"Attempt={attempt.retry_state.attempt_number}")
            mocked_fct.assert_called_once()
Пример #10
0
async def assert_and_wait_for_pipeline_status(
    client: httpx.AsyncClient,
    url: AnyHttpUrl,
    user_id: UserID,
    project_uuid: UUID,
    wait_for_states: Optional[List[RunningState]] = None,
) -> ComputationTaskGet:
    if not wait_for_states:
        wait_for_states = [
            RunningState.SUCCESS,
            RunningState.FAILED,
            RunningState.ABORTED,
        ]
    MAX_TIMEOUT_S = 5 * MINUTE

    async def check_pipeline_state() -> ComputationTaskGet:
        response = await client.get(url, params={"user_id": user_id})
        assert (
            response.status_code == status.HTTP_202_ACCEPTED
        ), f"response code is {response.status_code}, error: {response.text}"
        task_out = ComputationTaskGet.parse_obj(response.json())
        assert task_out.id == project_uuid
        assert task_out.url == f"{client.base_url}/v2/computations/{project_uuid}"
        print(
            f"Pipeline '{project_uuid=}' current task out is '{task_out=}'",
        )
        assert wait_for_states
        assert (
            task_out.state in wait_for_states
        ), f"current task state is '{task_out.state}', not in any of {wait_for_states}"
        return task_out

    start = time.monotonic()
    async for attempt in AsyncRetrying(
        stop=stop_after_delay(MAX_TIMEOUT_S),
        wait=wait_fixed(2),
        retry=retry_if_exception_type(AssertionError),
        reraise=True,
    ):
        elapsed_s = time.monotonic() - start
        with attempt:
            print(
                f"Waiting for pipeline '{project_uuid=}' state to be one of: {wait_for_states=}, attempt={attempt.retry_state.attempt_number}, time={elapsed_s}s"
            )
            task_out = await check_pipeline_state()
            print(
                f"Pipeline '{project_uuid=}' state succesfuly became '{task_out.state}'\n{json.dumps(attempt.retry_state.retry_object.statistics, indent=2)}, time={elapsed_s}s"
            )

            return task_out

    # this is only to satisfy pylance
    raise AssertionError("No computation task generated!")
Пример #11
0
def simcore_stack_deployed_services(
    docker_registry: UrlStr,
    core_stack_namespace: str,
    ops_stack_namespace: str,
    core_stack_compose_specs: ComposeSpec,
    docker_client: DockerClient,
) -> List[Service]:

    # NOTE: the goal here is NOT to test time-to-deploy but
    # rather guaranteing that the framework is fully deployed before starting
    # tests. Obviously in a critical state in which the frameworks has a problem
    # the fixture will fail
    try:
        for attempt in Retrying(
                wait=wait_fixed(5),
                stop=stop_after_delay(4 * _MINUTE),
                before_sleep=before_sleep_log(log, logging.INFO),
                reraise=True,
        ):
            with attempt:
                for service in docker_client.services.list():
                    assert_service_is_running(service)

    finally:
        for stack_namespace in (core_stack_namespace, ops_stack_namespace):
            subprocess.run(f"docker stack ps {stack_namespace}",
                           shell=True,
                           check=False)

        # logs table like
        #  ID                  NAME                  IMAGE                                      NODE                DESIRED STATE       CURRENT STATE                ERROR
        # xbrhmaygtb76        simcore_sidecar.1     itisfoundation/sidecar:latest              crespo-wkstn        Running             Running 53 seconds ago
        # zde7p8qdwk4j        simcore_rabbit.1      itisfoundation/rabbitmq:3.8.0-management   crespo-wkstn        Running             Running 59 seconds ago
        # f2gxmhwq7hhk        simcore_postgres.1    postgres:10.10                             crespo-wkstn        Running             Running about a minute ago
        # 1lh2hulxmc4q        simcore_director.1    itisfoundation/director:latest             crespo-wkstn        Running             Running 34 seconds ago
        # ...

    # TODO: find a more reliable way to list services in a stack
    core_stack_services: List[Service] = [
        service for service in docker_client.services.list(
            filters={
                "label": f"com.docker.stack.namespace={core_stack_namespace}"
            })
    ]  # type: ignore

    assert (core_stack_services
            ), f"Expected some services in core stack '{core_stack_namespace}'"

    assert len(core_stack_compose_specs["services"].keys()) == len(
        core_stack_services)

    return core_stack_services
Пример #12
0
def wait_for_status(
    method: Callable[..., T],
    validate: Optional[Callable[[T], bool]] = None,
    fixed_wait_time: float = 5,
    timeout: float = 300,
    **method_kwargs: Any,
) -> T:
    """Tries to run *method* (and run also a validation of its output) until no AssertionError is raised.

    Arguments are described below. More keyword arguments can be given for `method`.

    Args:
        method (Callable): An unreliable method (or a status query). The method will be executed while: \
            (it raises an AssetionError or the `validation` function outputs `False`) and none of the ending \
            conditions is satisfied (look at int arguments)

        validate (Optional[Callable], optional): A callable that validates the output of *method*. \
            It must receives the output of `method` as argument and returns `True` if it is ok and `False` if it is \
            invalid. Defaults to None, meaning no validation will be executed.

        fixed_wait_time (float, optional): Time (in seconds) to wait between attempts. Defaults to 5.

        timeout (float, optional): Time (in seconds) after which no more attempts are made. Defaults to 300 (5 minutes).

    Returns:
        [Any]: The method's output
    """
    @retry(
        wait=wait_fixed(fixed_wait_time),
        stop=stop_after_delay(timeout),
        retry=retry_if_exception_type(AssertionError),
    )
    def _wait_for_status(
        method: Callable[..., T],
        validate: Optional[Callable[[T], bool]] = None,
        **method_kwargs: Any,
    ) -> T:
        """Runs the method and apply validation."""
        try:
            result: T = method(**method_kwargs)
            if validate is not None:
                assert validate(
                    result), f'Validation failed. Result is {result}'
        except Exception as ex:
            if not isinstance(ex, AssertionError):
                print(
                    f'An unexpected error was detected, method result was: {result}'
                )
            raise
        return result

    return _wait_for_status(method=method, validate=validate, **method_kwargs)
Пример #13
0
 async def _check_all_services_are_running():
     async for attempt in AsyncRetrying(
             wait=wait_fixed(5),
             stop=stop_after_delay(8 * MINUTE),
             before_sleep=before_sleep_log(log, logging.INFO),
             reraise=True,
     ):
         with attempt:
             await asyncio.gather(*[
                 asyncio.get_event_loop().run_in_executor(
                     None, assert_service_is_running, service)
                 for service in docker_client.services.list()
             ])
Пример #14
0
def wemo_on():
    @tenacity.retry(wait=wait_fixed(10) + wait_random(-2, 2),
                    stop=stop_after_delay(60 * 60),
                    before_sleep=before_sleep_log(_LOGGER, logging.INFO))
    def discover_and_on():
        address = settings.wemo_address
        port = pywemo.ouimeaux_device.probe_wemo(address)
        url = 'http://%s:%i/setup.xml' % (address, port)
        device = pywemo.discovery.device_from_description(url, None)
        device.on()
        _LOGGER.info("Called on on %s", device)

    discover_and_on()
    return "ok"
Пример #15
0
async def test_interactive_services_removed_after_logout(
    client: TestClient,
    logged_user: Dict[str, Any],
    empty_user_project: Dict[str, Any],
    mocked_director_v2_api: Dict[str, mock.MagicMock],
    create_dynamic_service_mock,
    client_session_id_factory: Callable[[], str],
    socketio_client_factory: Callable,
    storage_subsystem_mock:
    MockedStorageSubsystem,  # when guest user logs out garbage is collected
    director_v2_service_mock: aioresponses,
    exp_save_state: bool,
):
    set_service_deletion_delay(SERVICE_DELETION_DELAY, client.server.app)
    # login - logged_user fixture
    # create empty study - empty_user_project fixture
    # create dynamic service - create_dynamic_service_mock fixture
    service = await create_dynamic_service_mock(logged_user["id"],
                                                empty_user_project["uuid"])
    # create websocket
    client_session_id1 = client_session_id_factory()
    sio = await socketio_client_factory(client_session_id1)
    # open project in client 1
    await open_project(client, empty_user_project["uuid"], client_session_id1)
    # logout
    logout_url = client.app.router["auth_logout"].url_for()
    r = await client.post(f"{logout_url}",
                          json={"client_session_id": client_session_id1})
    assert r.url_obj.path == logout_url.path
    await assert_status(r, web.HTTPOk)

    # check result perfomed by background task
    await asyncio.sleep(SERVICE_DELETION_DELAY + 1)
    await garbage_collector.collect_garbage(client.app)

    # assert dynamic service is removed *this is done in a fire/forget way so give a bit of leeway
    async for attempt in AsyncRetrying(reraise=True,
                                       stop=stop_after_delay(10),
                                       wait=wait_fixed(1)):
        with attempt:
            mocked_director_v2_api[
                "director_v2_core.stop_service"].assert_awaited_with(
                    app=client.server.app,
                    service_uuid=service["service_uuid"],
                    save_state=exp_save_state,
                )
Пример #16
0
async def _assert_and_wait_for_comp_task_states_to_be_transmitted_in_projects(
    project_id: str,
    postgres_session: sa.orm.session.Session,
):

    async for attempt in AsyncRetrying(
            reraise=True,
            stop=stop_after_delay(120),
            wait=wait_fixed(5),
            retry=retry_if_exception_type(AssertionError),
    ):
        with attempt:
            print(
                f"--> waiting for pipeline results to move to projects table, attempt {attempt.retry_state.attempt_number}..."
            )
            comp_tasks_in_db: Dict[NodeIdStr,
                                   Any] = _get_computational_tasks_from_db(
                                       project_id, postgres_session)
            workbench_in_db: Dict[NodeIdStr,
                                  Any] = _get_project_workbench_from_db(
                                      project_id, postgres_session)
            for node_id, node_values in comp_tasks_in_db.items():
                assert (
                    node_id in workbench_in_db
                ), f"node {node_id=} is missing from workbench {json_dumps(workbench_in_db, indent=2)}"

                node_in_project_table = workbench_in_db[node_id]

                # if this one is in, the other should also be but let's check it carefully
                assert node_values.run_hash
                assert "runHash" in node_in_project_table
                assert node_values.run_hash == node_in_project_table["runHash"]

                assert node_values.state
                assert "state" in node_in_project_table
                assert "currentStatus" in node_in_project_table["state"]
                # NOTE: beware that the comp_tasks has StateType and Workbench has RunningState (sic)
                assert (DB_TO_RUNNING_STATE[node_values.state].value ==
                        node_in_project_table["state"]["currentStatus"])
            print(
                "--> tasks were properly transferred! "
                f"That's great: {json_dumps(attempt.retry_state.retry_object.statistics)}",
            )
Пример #17
0
    async def _create_client(address: str) -> aioredis.Redis:
        client: Optional[aioredis.Redis] = None

        async for attempt in AsyncRetrying(
                stop=stop_after_delay(1 * _MINUTE),
                wait=wait_fixed(_WAIT_SECS),
                before_sleep=before_sleep_log(log, logging.WARNING),
                reraise=True,
        ):
            with attempt:
                client = await aioredis.create_redis_pool(address,
                                                          encoding="utf-8")
                log.info(
                    "Connection to %s succeeded with %s [%s]",
                    f"redis at {address=}",
                    f"{client=}",
                    json.dumps(attempt.retry_state.retry_object.statistics),
                )
        assert client  # nosec
        return client
Пример #18
0
async def _assert_wait_for_task_status(
    job_id: str,
    dask_client: DaskClient,
    expected_status: RunningState,
    timeout: Optional[int] = None,
):
    async for attempt in AsyncRetrying(
        reraise=True,
        stop=stop_after_delay(timeout or _ALLOW_TIME_FOR_GATEWAY_TO_CREATE_WORKERS),
        wait=wait_fixed(1),
    ):
        with attempt:
            print(
                f"waiting for task to be {expected_status=}, "
                f"Attempt={attempt.retry_state.attempt_number}"
            )
            current_task_status = await dask_client.get_task_status(job_id)
            assert isinstance(current_task_status, RunningState)
            print(f"{current_task_status=} vs {expected_status=}")
            assert current_task_status == expected_status
Пример #19
0
async def test_listen_comp_tasks_task(
    mock_project_subsystem: Dict,
    comp_task_listening_task: None,
    client,
    update_values: Dict[str, Any],
    expected_calls: List[str],
    task_class: NodeClass,
):
    db_engine: aiopg.sa.Engine = client.app[APP_DB_ENGINE_KEY]
    async with db_engine.acquire() as conn:
        # let's put some stuff in there now
        result = await conn.execute(
            comp_tasks.insert()
            .values(outputs=json.dumps({}), node_class=task_class)
            .returning(literal_column("*"))
        )
        row: RowProxy = await result.fetchone()
        task = dict(row)

        # let's update some values
        await conn.execute(
            comp_tasks.update()
            .values(**update_values)
            .where(comp_tasks.c.task_id == task["task_id"])
        )

        # tests whether listener gets hooked calls executed
        for call_name, mocked_call in mock_project_subsystem.items():
            if call_name in expected_calls:
                async for attempt in AsyncRetrying(
                    wait=wait_fixed(1),
                    stop=stop_after_delay(10),
                    retry=retry_if_exception_type(AssertionError),
                    before_sleep=before_sleep_log(logger, logging.INFO),
                    reraise=True,
                ):
                    with attempt:
                        mocked_call.assert_awaited()

            else:
                mocked_call.assert_not_called()
Пример #20
0
def docker_swarm(docker_client: docker.client.DockerClient,
                 keep_docker_up: Iterator[bool]) -> Iterator[None]:
    """inits docker swarm"""

    for attempt in Retrying(wait=wait_fixed(2),
                            stop=stop_after_delay(15),
                            reraise=True):
        with attempt:
            if not _in_docker_swarm(docker_client):
                docker_client.swarm.init(advertise_addr=get_localhost_ip())
            # if still not in swarm, raise an error to try and initialize again
            _in_docker_swarm(docker_client, raise_error=True)

    assert _in_docker_swarm(docker_client) is True

    yield

    if not keep_docker_up:
        assert docker_client.swarm.leave(force=True)

    assert _in_docker_swarm(docker_client) is keep_docker_up
Пример #21
0
 async def create_client(url) -> aioredis.Redis:
     # create redis client
     client: Optional[aioredis.Redis] = None
     async for attempt in AsyncRetrying(
             stop=stop_after_delay(1 * _MINUTE),
             wait=wait_fixed(_WAIT_SECS),
             before_sleep=before_sleep_log(log, logging.WARNING),
             reraise=True,
     ):
         with attempt:
             client = await aioredis.create_redis_pool(url,
                                                       encoding="utf-8")
             if not client:
                 raise ValueError(
                     "Expected aioredis client instance, got {client}")
             log.info(
                 "Connection to %s succeeded [%s]",
                 f"redis at {endpoint=}",
                 json.dumps(attempt.retry_state.retry_object.statistics),
             )
     assert client  # no sec
     return client
Пример #22
0
async def test_creating_new_project_from_template_and_disconnecting_does_not_create_project(
    client: TestClient,
    logged_user: Dict[str, Any],
    primary_group: Dict[str, str],
    standard_groups: List[Dict[str, str]],
    template_project: Dict[str, Any],
    expected: ExpectedResponse,
    catalog_subsystem_mock: Callable,
    slow_storage_subsystem_mock: MockedStorageSubsystem,
    project_db_cleaner: None,
):
    catalog_subsystem_mock([template_project])
    # create a project from another and disconnect while doing this by timing out
    # POST /v0/projects
    create_url = client.app.router["create_projects"].url_for()
    assert str(create_url) == f"{API_PREFIX}/projects"
    create_url = create_url.with_query(from_template=template_project["uuid"])
    with pytest.raises(asyncio.TimeoutError):
        await client.post(f"{create_url}", json={}, timeout=5)

    # let's check that there are no new project created, after timing out
    list_url = client.app.router["list_projects"].url_for()
    assert str(list_url) == API_PREFIX + "/projects"
    list_url = list_url.with_query(type="user")
    resp = await client.get(f"{list_url}")
    data, *_ = await assert_status(
        resp,
        expected.ok,
    )
    assert not data

    # NOTE: after coming back here timing-out, the code shall still run
    # in the server which is why we need to retry here
    async for attempt in AsyncRetrying(
        reraise=True, stop=stop_after_delay(20), wait=wait_fixed(1)
    ):
        with attempt:
            slow_storage_subsystem_mock.delete_project.assert_called_once()
Пример #23
0
def simcore_docker_stack_and_registry_ready(
    event_loop: asyncio.AbstractEventLoop,
    docker_registry: UrlStr,
    docker_stack: Dict,
    simcore_services_ready: None,
) -> Dict:
    # At this point `simcore_services_ready` waited until all services
    # are running. Let's make one more check on the web-api
    for attempt in Retrying(
        wait=wait_fixed(1),
        stop=stop_after_delay(0.5 * _MINUTE),
        reraise=True,
        before_sleep=before_sleep_log(log, logging.INFO),
    ):
        with attempt:
            resp = httpx.get("http://127.0.0.1:9081/v0/")
            resp.raise_for_status()
            log.info(
                "Connection to osparc-simcore web API succeeded [%s]",
                json.dumps(attempt.retry_state.retry_object.statistics),
            )

    return docker_stack
Пример #24
0
def retry(stop_max_delay=None, **kwargs):
    k = {"wait": _default_wait, "retry": lambda x: False}
    if stop_max_delay not in (True, False, None):
        k['stop'] = stop.stop_after_delay(stop_max_delay)
    return tenacity.retry(**k)
Пример #25
0
from threading import Event

import tempfile
import unittest
from gpiozero import Button, RotaryEncoder
from tenacity import retry
from tenacity.stop import stop_after_delay
from typing import Callable

from timemachine import config, controls, GD

stop_event = Event()
knob_event = Event()


@retry(stop=stop_after_delay(10))
def retry_call(callable: Callable, *args, **kwargs):
    """Retry a call."""
    return callable(*args, **kwargs)


def stop_button(button):
    print("Stop button pressed")
    stop_event.set()


def twist_knob(knob: RotaryEncoder, label):
    print(f"Knob {label} steps={knob.steps} value={knob.value}")
    knob_event.set()

Пример #26
0
# TESTS ------------------------------------------------------------------------------------
#
#   publisher ---> (rabbitMQ)  ---> webserver --- (socketio) ---> front-end pages
#
# - logs, instrumentation and progress are set to rabbitMQ messages
# - webserver consumes these messages and forwards them to the front-end broadcasting them to socketio
# - all front-end insteances connected to these channes will get notified when new messages are directed
#   to them
#

POLLING_TIME = 0.2
TIMEOUT_S = 5
RETRY_POLICY = dict(
    wait=wait_fixed(POLLING_TIME),
    stop=stop_after_delay(TIMEOUT_S),
    before_sleep=before_sleep_log(logger, log_level=logging.WARNING),
    reraise=True,
)
NUMBER_OF_MESSAGES = 1
USER_ROLES = [
    UserRole.GUEST,
    UserRole.USER,
    UserRole.TESTER,
]


@pytest.mark.parametrize("user_role", USER_ROLES)
async def test_publish_to_other_user(
    other_user_id: int,
    other_project_id: UUIDStr,
Пример #27
0
class DataStorageManager:  # pylint: disable=too-many-public-methods
    """Data storage manager

    The dsm has access to the database for all meta data and to the actual backend. For now this
    is simcore's S3 [minio] and the datcore storage facilities.

    For all data that is in-house (simcore.s3, ...) we keep a synchronized database with meta information
    for the physical files.

    For physical changes on S3, that might be time-consuming, the db keeps a state (delete and upload mostly)

    The dsm provides the following additional functionalities:

    - listing of folders for a given users, optionally filtered using a regular expression and optionally
      sorted by one of the meta data keys

    - upload/download of files

        client -> S3 : presigned upload link
        S3 -> client : presigned download link
        datcore -> client: presigned download link
        S3 -> datcore: local copy and then upload via their api

    minio/S3 and postgres can talk nicely with each other via Notifications using rabbigMQ which we already have.
    See:

        https://blog.minio.io/part-5-5-publish-minio-events-via-postgresql-50f6cc7a7346
        https://docs.minio.io/docs/minio-bucket-notification-guide.html
    """

    # TODO: perhaps can be used a cache? add a lifetime?

    s3_client: MinioClientWrapper
    engine: Engine
    loop: object
    pool: ThreadPoolExecutor
    simcore_bucket_name: str
    has_project_db: bool
    session: AioSession = field(default_factory=aiobotocore.get_session)
    datcore_tokens: Dict[str, DatCoreApiToken] = field(default_factory=dict)
    app: Optional[web.Application] = None

    def _create_aiobotocore_client_context(self) -> ClientCreatorContext:
        assert hasattr(self.session, "create_client")  # nosec
        # pylint: disable=no-member

        # SEE API in https://botocore.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html
        # SEE https://aiobotocore.readthedocs.io/en/latest/index.html
        return self.session.create_client(
            "s3",
            endpoint_url=self.s3_client.endpoint_url,
            aws_access_key_id=self.s3_client.access_key,
            aws_secret_access_key=self.s3_client.secret_key,
        )

    def _get_datcore_tokens(
            self, user_id: str) -> Tuple[Optional[str], Optional[str]]:
        # pylint: disable=no-member
        token = self.datcore_tokens.get(user_id, DatCoreApiToken())
        return token.to_tuple()

    async def locations(self, user_id: str):
        locs = []
        simcore_s3 = {"name": SIMCORE_S3_STR, "id": SIMCORE_S3_ID}
        locs.append(simcore_s3)

        api_token, api_secret = self._get_datcore_tokens(user_id)

        if api_token and api_secret and self.app:
            if await datcore_adapter.check_user_can_connect(
                    self.app, api_token, api_secret):
                datcore = {"name": DATCORE_STR, "id": DATCORE_ID}
                locs.append(datcore)

        return locs

    @classmethod
    def location_from_id(cls, location_id: str):
        return get_location_from_id(location_id)

    # LIST/GET ---------------------------

    # pylint: disable=too-many-arguments
    # pylint: disable=too-many-branches
    # pylint: disable=too-many-statements
    async def list_files(self,
                         user_id: str,
                         location: str,
                         uuid_filter: str = "",
                         regex: str = "") -> List[FileMetaDataEx]:
        """Returns a list of file paths

        - Works for simcore.s3 and datcore
        - Can filter on uuid: useful to filter on project_id/node_id
        - Can filter upon regular expression (for now only on key: value pairs of the FileMetaData)
        """
        data = deque()
        if location == SIMCORE_S3_STR:
            accesible_projects_ids = []
            async with self.engine.acquire() as conn, conn.begin():
                accesible_projects_ids = await get_readable_project_ids(
                    conn, int(user_id))
                where_statement = (
                    file_meta_data.c.user_id == user_id
                ) | file_meta_data.c.project_id.in_(accesible_projects_ids)
                if uuid_filter:
                    where_statement &= file_meta_data.c.file_uuid.ilike(
                        f"%{uuid_filter}%")
                query = sa.select([file_meta_data]).where(where_statement)

                async for row in conn.execute(query):
                    dex = to_meta_data_extended(row)
                    if not is_file_entry_valid(dex.fmd):
                        # NOTE: the file is not updated with the information from S3 backend.
                        # 1. Either the file exists, but was never updated in the database
                        # 2. Or the file does not exist or was never completed, and the file_meta_data entry is old and faulty
                        # we need to update from S3 here since the database is not up-to-date
                        dex = await self.try_update_database_from_storage(
                            dex.fmd.file_uuid,
                            dex.fmd.bucket_name,
                            dex.fmd.object_name,
                        )
                    if dex:
                        data.append(dex)

            if self.has_project_db:
                uuid_name_dict = {}
                # now parse the project to search for node/project names
                try:
                    async with self.engine.acquire() as conn, conn.begin():
                        query = sa.select([projects]).where(
                            projects.c.uuid.in_(accesible_projects_ids))

                        async for row in conn.execute(query):
                            proj_data = dict(row.items())

                            uuid_name_dict[
                                proj_data["uuid"]] = proj_data["name"]
                            wb = proj_data["workbench"]
                            for node in wb.keys():
                                uuid_name_dict[node] = wb[node]["label"]
                except DBAPIError as _err:
                    logger.exception(
                        "Error querying database for project names")

                if not uuid_name_dict:
                    # there seems to be no project whatsoever for user_id
                    return []

                # only keep files from non-deleted project
                clean_data = deque()
                for dx in data:
                    d = dx.fmd
                    if d.project_id not in uuid_name_dict:
                        continue
                    #
                    # FIXME: artifically fills ['project_name', 'node_name', 'file_id', 'raw_file_path', 'display_file_path']
                    #        with information from the projects table!

                    d.project_name = uuid_name_dict[d.project_id]
                    if d.node_id in uuid_name_dict:
                        d.node_name = uuid_name_dict[d.node_id]

                    d.raw_file_path = str(
                        Path(d.project_id) / Path(d.node_id) /
                        Path(d.file_name))
                    d.display_file_path = d.raw_file_path
                    d.file_id = d.file_uuid
                    if d.node_name and d.project_name:
                        d.display_file_path = str(
                            Path(d.project_name) / Path(d.node_name) /
                            Path(d.file_name))
                        # once the data was sync to postgres metadata table at this point
                        clean_data.append(dx)

                data = clean_data

        elif location == DATCORE_STR:
            api_token, api_secret = self._get_datcore_tokens(user_id)
            assert self.app  # nosec
            assert api_secret  # nosec
            assert api_token  # nosec
            return await datcore_adapter.list_all_datasets_files_metadatas(
                self.app, api_token, api_secret)

        if uuid_filter:
            # TODO: incorporate this in db query!
            _query = re.compile(uuid_filter, re.IGNORECASE)
            filtered_data = deque()
            for dx in data:
                d = dx.fmd
                if _query.search(d.file_uuid):
                    filtered_data.append(dx)

            return list(filtered_data)

        if regex:
            _query = re.compile(regex, re.IGNORECASE)
            filtered_data = deque()
            for dx in data:
                d = dx.fmd
                _vars = vars(d)
                for v in _vars.keys():
                    if _query.search(v) or _query.search(str(_vars[v])):
                        filtered_data.append(dx)
                        break
            return list(filtered_data)

        return list(data)

    async def list_files_dataset(
            self, user_id: str, location: str, dataset_id: str
    ) -> Union[List[FileMetaData], List[FileMetaDataEx]]:
        # this is a cheap shot, needs fixing once storage/db is in sync
        data = []
        if location == SIMCORE_S3_STR:
            data: List[FileMetaDataEx] = await self.list_files(
                user_id, location, uuid_filter=dataset_id + "/")

        elif location == DATCORE_STR:
            api_token, api_secret = self._get_datcore_tokens(user_id)
            # lists all the files inside the dataset
            assert self.app  # nosec
            assert api_secret  # nosec
            assert api_token  # nosec
            return await datcore_adapter.list_all_files_metadatas_in_dataset(
                self.app, api_token, api_secret, dataset_id)

        return data

    async def list_datasets(self, user_id: str,
                            location: str) -> List[DatasetMetaData]:
        """Returns a list of top level datasets

        Works for simcore.s3 and datcore

        """
        data = []

        if location == SIMCORE_S3_STR:
            if self.has_project_db:
                try:
                    async with self.engine.acquire() as conn, conn.begin():
                        readable_projects_ids = await get_readable_project_ids(
                            conn, int(user_id))
                        has_read_access = projects.c.uuid.in_(
                            readable_projects_ids)

                        # FIXME: this DOES NOT read from file-metadata table!!!
                        query = sa.select([projects.c.uuid, projects.c.name
                                           ]).where(has_read_access)
                        async for row in conn.execute(query):
                            dmd = DatasetMetaData(
                                dataset_id=row.uuid,
                                display_name=row.name,
                            )
                            data.append(dmd)
                except DBAPIError as _err:
                    logger.exception(
                        "Error querying database for project names")

        elif location == DATCORE_STR:
            api_token, api_secret = self._get_datcore_tokens(user_id)
            assert self.app  # nosec
            assert api_secret  # nosec
            assert api_token  # nosec
            return await datcore_adapter.list_datasets(self.app, api_token,
                                                       api_secret)

        return data

    async def list_file(self, user_id: str, location: str,
                        file_uuid: str) -> Optional[FileMetaDataEx]:

        if location == SIMCORE_S3_STR:

            async with self.engine.acquire() as conn, conn.begin():
                can: Optional[AccessRights] = await get_file_access_rights(
                    conn, int(user_id), file_uuid)
                if can.read:
                    query = sa.select([
                        file_meta_data
                    ]).where(file_meta_data.c.file_uuid == file_uuid)
                    result = await conn.execute(query)
                    row = await result.first()
                    if not row:
                        return None
                    file_metadata = to_meta_data_extended(row)
                    if is_file_entry_valid(file_metadata.fmd):
                        return file_metadata
                    # we need to update from S3 here since the database is not up-to-date
                    file_metadata = await self.try_update_database_from_storage(
                        file_metadata.fmd.file_uuid,
                        file_metadata.fmd.bucket_name,
                        file_metadata.fmd.object_name,
                    )
                    return file_metadata
                # FIXME: returns None in both cases: file does not exist or use has no access
                logger.debug("User %s cannot read file %s", user_id, file_uuid)
                return None

        elif location == DATCORE_STR:
            # FIXME: review return inconsistencies
            # api_token, api_secret = self._get_datcore_tokens(user_id)
            import warnings

            warnings.warn("NOT IMPLEMENTED!!!")
            return None

    # UPLOAD/DOWNLOAD LINKS ---------------------------

    async def upload_file_to_datcore(self, _user_id: str,
                                     _local_file_path: str,
                                     _destination_id: str):
        import warnings

        warnings.warn(f"NOT IMPLEMENTED!!! in {self.__class__}")
        # uploads a locally available file to dat core given the storage path, optionally attached some meta data
        # api_token, api_secret = self._get_datcore_tokens(user_id)
        # await dcw.upload_file_to_id(destination_id, local_file_path)

    async def try_update_database_from_storage(
        self,
        file_uuid: str,
        bucket_name: str,
        object_name: str,
        silence_exception: bool = False,
    ) -> Optional[FileMetaDataEx]:
        try:
            async with self._create_aiobotocore_client_context(
            ) as aioboto_client:
                result = await aioboto_client.head_object(Bucket=bucket_name,
                                                          Key=object_name
                                                          )  # type: ignore

                file_size = result["ContentLength"]  # type: ignore
                last_modified = result["LastModified"]  # type: ignore
                entity_tag = result["ETag"].strip('"')  # type: ignore

                async with self.engine.acquire() as conn:
                    result: ResultProxy = await conn.execute(
                        file_meta_data.update().where(
                            file_meta_data.c.file_uuid == file_uuid).values(
                                file_size=file_size,
                                last_modified=last_modified,
                                entity_tag=entity_tag,
                            ).returning(literal_column("*")))
                    if not result:
                        return None
                    row: Optional[RowProxy] = await result.first()
                    if not row:
                        return None

                    return to_meta_data_extended(row)
        except botocore.exceptions.ClientError:
            if silence_exception:
                logger.debug("Error happened while trying to access %s",
                             file_uuid)
            else:
                logger.warning("Error happened while trying to access %s",
                               file_uuid,
                               exc_info=True)
            # the file is not existing or some error happened
            return None

    @retry(
        stop=stop_after_delay(1 * _HOUR),
        wait=wait_exponential(multiplier=0.1, exp_base=1.2, max=30),
        retry=(retry_if_exception_type()
               | retry_if_result(lambda result: result is None)),
        before_sleep=before_sleep_log(logger, logging.INFO),
    )
    async def auto_update_database_from_storage_task(self, file_uuid: str,
                                                     bucket_name: str,
                                                     object_name: str):
        return await self.try_update_database_from_storage(
            file_uuid, bucket_name, object_name, silence_exception=True)

    async def upload_link(self, user_id: str, file_uuid: str):
        """
        Creates pre-signed upload link and updates metadata table when
        link is used and upload is successfuly completed

        SEE _metadata_file_updater
        """

        async with self.engine.acquire() as conn:
            can: Optional[AccessRights] = await get_file_access_rights(
                conn, int(user_id), file_uuid)
            if not can.write:
                logger.debug("User %s was not allowed to upload file %s",
                             user_id, file_uuid)
                raise web.HTTPForbidden(
                    reason=
                    f"User does not have enough access rights to upload file {file_uuid}"
                )

        @retry(**postgres_service_retry_policy_kwargs)
        async def _init_metadata() -> Tuple[int, str]:
            async with self.engine.acquire() as conn:
                fmd = FileMetaData()
                fmd.simcore_from_uuid(file_uuid, self.simcore_bucket_name)
                fmd.user_id = user_id  # NOTE: takes ownership of uploaded data

                # if file already exists, we might want to update a time-stamp

                # upsert file_meta_data
                insert_stmt = pg_insert(file_meta_data).values(**vars(fmd))
                do_nothing_stmt = insert_stmt.on_conflict_do_nothing(
                    index_elements=["file_uuid"])
                await conn.execute(do_nothing_stmt)

                return fmd.file_size, fmd.last_modified

        await _init_metadata()

        bucket_name = self.simcore_bucket_name
        object_name = file_uuid

        # a parallel task is tarted which will update the metadata of the updated file
        # once the update has finished.
        fire_and_forget_task(
            self.auto_update_database_from_storage_task(
                file_uuid=file_uuid,
                bucket_name=bucket_name,
                object_name=object_name,
            ))
        return self.s3_client.create_presigned_put_url(bucket_name,
                                                       object_name)

    async def download_link_s3(self, file_uuid: str, user_id: int) -> str:

        # access layer
        async with self.engine.acquire() as conn:
            can: Optional[AccessRights] = await get_file_access_rights(
                conn, int(user_id), file_uuid)
            if not can.read:
                # NOTE: this is tricky. A user with read access can download and data!
                # If write permission would be required, then shared projects as views cannot
                # recover data in nodes (e.g. jupyter cannot pull work data)
                #
                logger.debug("User %s was not allowed to download file %s",
                             user_id, file_uuid)
                raise web.HTTPForbidden(
                    reason=
                    f"User does not have enough rights to download {file_uuid}"
                )

        bucket_name = self.simcore_bucket_name
        async with self.engine.acquire() as conn:
            stmt = sa.select([file_meta_data.c.object_name
                              ]).where(file_meta_data.c.file_uuid == file_uuid)
            object_name: Optional[str] = await conn.scalar(stmt)

            if object_name is None:
                raise web.HTTPNotFound(
                    reason=f"File '{file_uuid}' does not exists in storage.")

        link = self.s3_client.create_presigned_get_url(bucket_name,
                                                       object_name)
        return link

    async def download_link_datcore(self, user_id: str, file_id: str) -> URL:
        api_token, api_secret = self._get_datcore_tokens(user_id)
        assert self.app  # nosec
        assert api_secret  # nosec
        assert api_token  # nosec
        return await datcore_adapter.get_file_download_presigned_link(
            self.app, api_token, api_secret, file_id)

    # COPY -----------------------------

    async def copy_file_s3_s3(self, user_id: str, dest_uuid: str,
                              source_uuid: str):
        # FIXME: operation MUST be atomic

        # source is s3, location is s3
        to_bucket_name = self.simcore_bucket_name
        to_object_name = dest_uuid
        from_bucket = self.simcore_bucket_name
        from_object_name = source_uuid
        # FIXME: This is not async!
        self.s3_client.copy_object(to_bucket_name, to_object_name, from_bucket,
                                   from_object_name)

        # update db
        async with self.engine.acquire() as conn:
            fmd = FileMetaData()
            fmd.simcore_from_uuid(dest_uuid, self.simcore_bucket_name)
            fmd.user_id = user_id
            ins = file_meta_data.insert().values(**vars(fmd))
            await conn.execute(ins)

    async def copy_file_s3_datcore(self, user_id: str, dest_uuid: str,
                                   source_uuid: str):
        session = get_client_session(self.app)

        # source is s3, get link and copy to datcore
        bucket_name = self.simcore_bucket_name
        object_name = source_uuid
        filename = source_uuid.split("/")[-1]

        s3_dowload_link = self.s3_client.create_presigned_get_url(
            bucket_name, object_name)

        with tempfile.TemporaryDirectory() as tmpdir:
            # FIXME: connect download and upload streams
            local_file_path = os.path.join(tmpdir, filename)

            # Downloads S3 -> local
            await download_to_file_or_raise(session, s3_dowload_link,
                                            local_file_path)

            # Uploads local -> DATCore
            await self.upload_file_to_datcore(
                _user_id=user_id,
                _local_file_path=local_file_path,
                _destination_id=dest_uuid,
            )

    async def copy_file_datcore_s3(
        self,
        user_id: str,
        dest_uuid: str,
        source_uuid: str,
        filename_missing: bool = False,
    ):
        session = get_client_session(self.app)

        # 2 steps: Get download link for local copy, the upload link to s3
        # TODO: This should be a redirect stream!
        dc_link, filename = await self.download_link_datcore(
            user_id=user_id, file_id=source_uuid)
        if filename_missing:
            dest_uuid = str(Path(dest_uuid) / filename)

        s3_upload_link = await self.upload_link(user_id, dest_uuid)

        with tempfile.TemporaryDirectory() as tmpdir:
            # FIXME: connect download and upload streams

            local_file_path = os.path.join(tmpdir, filename)

            # Downloads DATCore -> local
            await download_to_file_or_raise(session, dc_link, local_file_path)

            # Uploads local -> S3
            s3_upload_link = URL(s3_upload_link)
            async with session.put(
                    s3_upload_link,
                    data=Path(local_file_path).open("rb"),
                    raise_for_status=True,
            ) as resp:
                logger.debug(
                    "Uploaded local -> SIMCore %s . Status %s",
                    s3_upload_link,
                    resp.status,
                )

        return dest_uuid

    async def copy_file(
        self,
        user_id: str,
        dest_location: str,
        dest_uuid: str,
        source_location: str,
        source_uuid: str,
    ):
        if source_location == SIMCORE_S3_STR:
            if dest_location == DATCORE_STR:
                await self.copy_file_s3_datcore(user_id, dest_uuid,
                                                source_uuid)
            elif dest_location == SIMCORE_S3_STR:
                await self.copy_file_s3_s3(user_id, dest_uuid, source_uuid)
        elif source_location == DATCORE_STR:
            if dest_location == DATCORE_STR:
                raise NotImplementedError(
                    "copy files from datcore 2 datcore not impl")
            if dest_location == SIMCORE_S3_STR:
                await self.copy_file_datcore_s3(user_id, dest_uuid,
                                                source_uuid)

    async def deep_copy_project_simcore_s3(
        self,
        user_id: str,
        source_project: Dict[str, Any],
        destination_project: Dict[str, Any],
        node_mapping: Dict[str, str],
    ):
        """Parses a given source project and copies all related files to the destination project

        Since all files are organized as

            project_id/node_id/filename or links to datcore

        this function creates a new folder structure

            project_id/node_id/filename

        and copies all files to the corresponding places.

        Additionally, all external files from datcore are being copied and the paths in the destination
        project are adapted accordingly

        Lastly, the meta data db is kept in sync
        """
        source_folder = source_project["uuid"]
        dest_folder = destination_project["uuid"]

        # access layer
        async with self.engine.acquire() as conn, conn.begin():
            source_access_rights = await get_project_access_rights(
                conn, int(user_id), project_id=source_folder)
            dest_access_rights = await get_project_access_rights(
                conn, int(user_id), project_id=dest_folder)
        if not source_access_rights.read:
            logger.debug(
                "User %s was not allowed to read from project %s",
                user_id,
                source_folder,
            )
            raise web.HTTPForbidden(
                reason=
                f"User does not have enough access rights to read from project '{source_folder}'"
            )

        if not dest_access_rights.write:
            logger.debug(
                "User %s was not allowed to write to project %s",
                user_id,
                dest_folder,
            )
            raise web.HTTPForbidden(
                reason=
                f"User does not have enough access rights to write to project '{dest_folder}'"
            )

        # build up naming map based on labels
        uuid_name_dict = {}
        uuid_name_dict[dest_folder] = destination_project["name"]
        for src_node_id, src_node in source_project["workbench"].items():
            new_node_id = node_mapping.get(src_node_id)
            if new_node_id is not None:
                uuid_name_dict[new_node_id] = src_node["label"]

        async with self._create_aiobotocore_client_context() as aioboto_client:

            logger.debug(
                "Listing all items under  %s:%s/",
                self.simcore_bucket_name,
                source_folder,
            )

            # Step 1: List all objects for this project replace them with the destination object name
            # and do a copy at the same time collect some names
            # Note: the / at the end of the Prefix is VERY important, makes the listing several order of magnitudes faster
            response = await aioboto_client.list_objects_v2(
                Bucket=self.simcore_bucket_name, Prefix=f"{source_folder}/")

            contents: List = response.get("Contents", [])
            logger.debug(
                "Listed  %s items under %s:%s/",
                len(contents),
                self.simcore_bucket_name,
                source_folder,
            )

            for item in contents:
                source_object_name = item["Key"]
                source_object_parts = Path(source_object_name).parts

                if len(source_object_parts) != 3:
                    # This may happen once we have shared/home folders
                    # FIXME: this might cause problems
                    logger.info(
                        "Skipping copy of '%s'. Expected three parts path!",
                        source_object_name,
                    )
                    continue

                old_node_id = source_object_parts[1]
                new_node_id = node_mapping.get(old_node_id)
                if new_node_id is not None:
                    old_filename = source_object_parts[2]
                    dest_object_name = str(
                        Path(dest_folder) / new_node_id / old_filename)

                    copy_kwargs = dict(
                        CopySource={
                            "Bucket": self.simcore_bucket_name,
                            "Key": source_object_name,
                        },
                        Bucket=self.simcore_bucket_name,
                        Key=dest_object_name,
                    )
                    logger.debug("Copying %s ...", copy_kwargs)

                    # FIXME: if 5GB, it must use multipart upload Upload Part - Copy API
                    # SEE https://botocore.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.copy_object
                    await aioboto_client.copy_object(**copy_kwargs)

        # Step 2: List all references in outputs that point to datcore and copy over
        for node_id, node in destination_project["workbench"].items():
            outputs: Dict = node.get("outputs", {})
            for _, output in outputs.items():
                source = output["path"]

                if output.get("store") == DATCORE_ID:
                    destination_folder = str(Path(dest_folder) / node_id)
                    logger.info("Copying %s to %s", source, destination_folder)

                    destination = await self.copy_file_datcore_s3(
                        user_id=user_id,
                        dest_uuid=destination_folder,
                        source_uuid=source,
                        filename_missing=True,
                    )
                    assert destination.startswith(destination_folder)  # nosec

                    output["store"] = SIMCORE_S3_ID
                    output["path"] = destination

                elif output.get("store") == SIMCORE_S3_ID:
                    destination = str(
                        Path(dest_folder) / node_id / Path(source).name)
                    output["store"] = SIMCORE_S3_ID
                    output["path"] = destination

        fmds = []
        async with self._create_aiobotocore_client_context() as aioboto_client:

            # step 3: list files first to create fmds
            # Note: the / at the end of the Prefix is VERY important, makes the listing several order of magnitudes faster
            response = await aioboto_client.list_objects_v2(
                Bucket=self.simcore_bucket_name, Prefix=f"{dest_folder}/")

            if "Contents" in response:
                for item in response["Contents"]:
                    fmd = FileMetaData()
                    fmd.simcore_from_uuid(item["Key"],
                                          self.simcore_bucket_name)
                    fmd.project_name = uuid_name_dict.get(
                        dest_folder, "Untitled")
                    fmd.node_name = uuid_name_dict.get(fmd.node_id, "Untitled")
                    fmd.raw_file_path = fmd.file_uuid
                    fmd.display_file_path = str(
                        Path(fmd.project_name) / fmd.node_name / fmd.file_name)
                    fmd.user_id = user_id
                    fmd.file_size = item["Size"]
                    fmd.last_modified = str(item["LastModified"])
                    fmds.append(fmd)

        # step 4 sync db
        async with self.engine.acquire() as conn, conn.begin():
            # TODO: upsert in one statment of ALL
            for fmd in fmds:
                query = sa.select([
                    file_meta_data
                ]).where(file_meta_data.c.file_uuid == fmd.file_uuid)
                # if file already exists, we might w
                rows = await conn.execute(query)
                exists = await rows.scalar()
                if exists:
                    delete_me = file_meta_data.delete().where(
                        file_meta_data.c.file_uuid == fmd.file_uuid)
                    await conn.execute(delete_me)
                ins = file_meta_data.insert().values(**vars(fmd))
                await conn.execute(ins)

    # DELETE -------------------------------------

    async def delete_file(self, user_id: str, location: str, file_uuid: str):
        """Deletes a file given its fmd and location

        Additionally requires a user_id for 3rd party auth

        For internal storage, the db state should be updated upon completion via
        Notification mechanism

        For simcore.s3 we can use the file_name
        For datcore we need the full path
        """
        if location == SIMCORE_S3_STR:
            # FIXME: operation MUST be atomic, transaction??

            to_delete = []
            async with self.engine.acquire() as conn, conn.begin():
                can: Optional[AccessRights] = await get_file_access_rights(
                    conn, int(user_id), file_uuid)
                if not can.delete:
                    logger.debug(
                        "User %s was not allowed to delete file %s",
                        user_id,
                        file_uuid,
                    )
                    raise web.HTTPForbidden(
                        reason=
                        f"User '{user_id}' does not have enough access rights to delete file {file_uuid}"
                    )

                query = sa.select([
                    file_meta_data.c.bucket_name, file_meta_data.c.object_name
                ]).where(file_meta_data.c.file_uuid == file_uuid)

                async for row in conn.execute(query):
                    if self.s3_client.remove_objects(row.bucket_name,
                                                     [row.object_name]):
                        to_delete.append(file_uuid)

                await conn.execute(file_meta_data.delete().where(
                    file_meta_data.c.file_uuid.in_(to_delete)))

        elif location == DATCORE_STR:
            # FIXME: review return inconsistencies
            api_token, api_secret = self._get_datcore_tokens(user_id)
            assert self.app  # nosec
            assert api_secret  # nosec
            assert api_token  # nosec
            await datcore_adapter.delete_file(self.app, api_token, api_secret,
                                              file_uuid)

    async def delete_project_simcore_s3(
            self,
            user_id: str,
            project_id: str,
            node_id: Optional[str] = None) -> Optional[web.Response]:
        """Deletes all files from a given node in a project in simcore.s3 and updated db accordingly.
        If node_id is not given, then all the project files db entries are deleted.
        """

        # FIXME: operation MUST be atomic. Mark for deletion and remove from db when deletion fully confirmed
        async with self.engine.acquire() as conn, conn.begin():
            # access layer
            can: Optional[AccessRights] = await get_project_access_rights(
                conn, int(user_id), project_id)
            if not can.delete:
                logger.debug(
                    "User %s was not allowed to delete project %s",
                    user_id,
                    project_id,
                )
                raise web.HTTPForbidden(
                    reason=f"User does not have delete access for {project_id}"
                )

            delete_me = file_meta_data.delete().where(
                file_meta_data.c.project_id == project_id, )
            if node_id:
                delete_me = delete_me.where(
                    file_meta_data.c.node_id == node_id)
            await conn.execute(delete_me)

        async with self._create_aiobotocore_client_context() as aioboto_client:
            # Note: the / at the end of the Prefix is VERY important, makes the listing several order of magnitudes faster
            response = await aioboto_client.list_objects_v2(
                Bucket=self.simcore_bucket_name,
                Prefix=f"{project_id}/{node_id}/"
                if node_id else f"{project_id}/",
            )

            objects_to_delete = []
            for f in response.get("Contents", []):
                objects_to_delete.append({"Key": f["Key"]})

            if objects_to_delete:
                response = await aioboto_client.delete_objects(
                    Bucket=self.simcore_bucket_name,
                    Delete={"Objects": objects_to_delete},
                )
                return response

    # SEARCH -------------------------------------

    async def search_files_starting_with(self, user_id: int,
                                         prefix: str) -> List[FileMetaDataEx]:
        # Avoids using list_files since it accounts for projects/nodes
        # Storage should know NOTHING about those concepts
        files_meta = deque()

        async with self.engine.acquire() as conn, conn.begin():
            # access layer
            can_read_projects_ids = await get_readable_project_ids(
                conn, int(user_id))
            has_read_access = (
                file_meta_data.c.user_id == str(user_id)
            ) | file_meta_data.c.project_id.in_(can_read_projects_ids)

            stmt = sa.select([file_meta_data]).where(
                file_meta_data.c.file_uuid.startswith(prefix)
                & has_read_access)

            async for row in conn.execute(stmt):
                meta_extended = to_meta_data_extended(row)
                files_meta.append(meta_extended)

        return list(files_meta)

    async def create_soft_link(self, user_id: int, target_uuid: str,
                               link_uuid: str) -> FileMetaDataEx:

        # validate link_uuid
        async with self.engine.acquire() as conn:
            # TODO: select exists(select 1 from file_metadat where file_uuid=12)
            found = await conn.scalar(
                sa.select([file_meta_data.c.file_uuid
                           ]).where(file_meta_data.c.file_uuid == link_uuid))
            if found:
                raise ValueError(
                    f"Invalid link {link_uuid}. Link already exists")

        # validate target_uuid
        target = await self.list_file(str(user_id), SIMCORE_S3_STR,
                                      target_uuid)
        if not target:
            raise ValueError(
                f"Invalid target '{target_uuid}'. File does not exists for this user"
            )

        # duplicate target and change the following columns:
        target.fmd.file_uuid = link_uuid
        target.fmd.file_id = link_uuid  # NOTE: api-server relies on this id
        target.fmd.is_soft_link = True

        async with self.engine.acquire() as conn:
            stmt = (file_meta_data.insert().values(
                **attr.asdict(target.fmd)).returning(literal_column("*")))

            result = await conn.execute(stmt)
            link = to_meta_data_extended(await result.first())
            return link

    async def synchronise_meta_data_table(self, location: str,
                                          dry_run: bool) -> Dict[str, Any]:

        PRUNE_CHUNK_SIZE = 20

        removed: List[str] = []
        to_remove: List[str] = []

        async def _prune_db_table(conn):
            if not dry_run:
                await conn.execute(file_meta_data.delete().where(
                    file_meta_data.c.object_name.in_(to_remove)))
            logger.info(
                "%s %s orphan items",
                "Would have deleted" if dry_run else "Deleted",
                len(to_remove),
            )
            removed.extend(to_remove)
            to_remove.clear()

        # ----------

        assert (  # nosec
            location == SIMCORE_S3_STR
        ), "Only with s3, no other sync implemented"  # nosec

        if location == SIMCORE_S3_STR:

            # NOTE: only valid for simcore, since datcore data is not in the database table
            # let's get all the files in the table
            logger.warning(
                "synchronisation of database/s3 storage started, this will take some time..."
            )

            async with self.engine.acquire(
            ) as conn, self._create_aiobotocore_client_context(
            ) as aioboto_client:

                number_of_rows_in_db = (await conn.scalar(
                    sa.select([sa.func.count()]).select_from(file_meta_data))
                                        or 0)
                logger.warning(
                    "Total number of entries to check %d",
                    number_of_rows_in_db,
                )

                assert isinstance(aioboto_client, AioBaseClient)  # nosec

                async for row in conn.execute(
                        sa.select([file_meta_data.c.object_name])):
                    s3_key = row.object_name  # type: ignore

                    # now check if the file exists in S3
                    # SEE https://www.peterbe.com/plog/fastest-way-to-find-out-if-a-file-exists-in-s3
                    response = await aioboto_client.list_objects_v2(
                        Bucket=self.simcore_bucket_name, Prefix=s3_key)
                    if response.get("KeyCount", 0) == 0:
                        # this file does not exist in S3
                        to_remove.append(s3_key)

                    if len(to_remove) >= PRUNE_CHUNK_SIZE:
                        await _prune_db_table(conn)

                if to_remove:
                    await _prune_db_table(conn)

                assert len(to_remove) == 0  # nosec
                assert len(removed) <= number_of_rows_in_db  # nosec

                logger.info(
                    "%s %d entries ",
                    "Would delete" if dry_run else "Deleting",
                    len(removed),
                )

        return {"removed": removed}
Пример #28
0
async def assert_service_is_running(
        service_id: str,
        docker,
        *,
        max_running_delay=1 * MINUTE
) -> Tuple[List[TaskDict], TenacityStatsDict]:
    MAX_WAIT = 5
    assert max_running_delay > 3 * MAX_WAIT

    #
    # The retry-policy constraints in this test
    # the time a service takes since it is deployed by the swarm
    # until it is running (i.e. started and healthy)
    #
    retry_policy = dict(
        # instead of wait_fix in order to help parallel execution in asyncio.gather
        wait=wait_random(1, MAX_WAIT),
        stop=stop_after_delay(max_running_delay),
        before_sleep=before_sleep_log(log, logging.INFO),
        reraise=True,
    )

    async for attempt in AsyncRetrying(**retry_policy):
        with attempt:

            # service
            service: ServiceDict = await docker.services.inspect(service_id)

            assert service_id == service["ID"]

            service_name = service["Spec"]["Name"]
            num_replicas = int(
                get_from_dict(service,
                              "Spec.Mode.Replicated.Replicas",
                              default=1))

            # tasks in a service
            tasks: List[TaskDict] = await docker.tasks.list(
                filters={"service": service_name})

            tasks_current_state = [task["Status"]["State"] for task in tasks]
            num_running = sum(current == "running"
                              for current in tasks_current_state)

            # assert condition
            is_running: bool = num_replicas == num_running

            error_msg = ""
            if not is_running:
                # lazy composes error msg
                logs_lines = await docker.services.logs(
                    service_id,
                    follow=False,
                    timestamps=True,
                    tail=50,  # SEE *_docker_logs artifacts for details
                )
                log_str = " ".join(logs_lines)
                tasks_json = json.dumps(
                    [
                        copy_from_dict(
                            task,
                            include={
                                "ID":...,
                                "CreatedAt":...,
                                "UpdatedAt":...,
                                "Spec": {
                                    "ContainerSpec": {"Image"}
                                },
                                "Status": {"Timestamp", "State"},
                                "DesiredState":...,
                            },
                        ) for task in tasks
                    ],
                    indent=1,
                )
                error_msg = (
                    f"{service_name=} has {tasks_current_state=}, but expected at least {num_replicas=} running. "
                    f"Details:\n"
                    f"tasks={tasks_json}\n"
                    f"logs={log_str}\n")

            assert is_running, error_msg

            log.info(
                "Connection to %s succeded [%s]",
                service_name,
                json.dumps(attempt.retry_state.retry_object.statistics),
            )

            return tasks, attempt.retry_state.retry_object.statistics
    assert False  # never reached
Пример #29
0
    async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
        logger.debug(
            "Getting docker compose spec for service %s", scheduler_data.service_name
        )

        dynamic_sidecar_client = get_dynamic_sidecar_client(app)
        dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint

        # Starts dynamic SIDECAR -------------------------------------
        # creates a docker compose spec given the service key and tag
        compose_spec = assemble_spec(
            app=app,
            service_key=scheduler_data.key,
            service_tag=scheduler_data.version,
            paths_mapping=scheduler_data.paths_mapping,
            compose_spec=scheduler_data.compose_spec,
            container_http_entry=scheduler_data.container_http_entry,
            dynamic_sidecar_network_name=scheduler_data.dynamic_sidecar_network_name,
        )

        await dynamic_sidecar_client.start_service_creation(
            dynamic_sidecar_endpoint, compose_spec
        )

        # Starts PROXY -----------------------------------------------
        # The entrypoint container name was now computed
        # continue starting the proxy

        # check values have been set by previous step
        if (
            scheduler_data.dynamic_sidecar.dynamic_sidecar_id is None
            or scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id is None
            or scheduler_data.dynamic_sidecar.swarm_network_id is None
            or scheduler_data.dynamic_sidecar.swarm_network_name is None
        ):
            raise ValueError(
                (
                    "Expected a value for all the following values: "
                    f"{scheduler_data.dynamic_sidecar.dynamic_sidecar_id=} "
                    f"{scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id=} "
                    f"{scheduler_data.dynamic_sidecar.swarm_network_id=} "
                    f"{scheduler_data.dynamic_sidecar.swarm_network_name=}"
                )
            )

        dynamic_sidecar_settings: DynamicSidecarSettings = (
            app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR
        )

        async for attempt in AsyncRetrying(
            stop=stop_after_delay(
                dynamic_sidecar_settings.DYNAMIC_SIDECAR_WAIT_FOR_CONTAINERS_TO_START
            ),
            wait=wait_fixed(1),
            retry_error_cls=EntrypointContainerNotFoundError,
            before_sleep=before_sleep_log(logger, logging.WARNING),
        ):
            with attempt:
                if scheduler_data.dynamic_sidecar.service_removal_state.was_removed:
                    # the service was removed while waiting for the operation to finish
                    logger.warning(
                        "Stopping `get_entrypoint_container_name` operation. "
                        "Will no try to start the service."
                    )
                    return

                entrypoint_container = await dynamic_sidecar_client.get_entrypoint_container_name(
                    dynamic_sidecar_endpoint=dynamic_sidecar_endpoint,
                    dynamic_sidecar_network_name=scheduler_data.dynamic_sidecar_network_name,
                )
                logger.info(
                    "Fetched container entrypoint name %s", entrypoint_container
                )

        dynamic_sidecar_node_id = await get_node_id_from_task_for_service(
            scheduler_data.dynamic_sidecar.dynamic_sidecar_id, dynamic_sidecar_settings
        )

        dynamic_sidecar_proxy_create_service_params = get_dynamic_proxy_spec(
            scheduler_data=scheduler_data,
            dynamic_sidecar_settings=dynamic_sidecar_settings,
            dynamic_sidecar_network_id=scheduler_data.dynamic_sidecar.dynamic_sidecar_network_id,
            swarm_network_id=scheduler_data.dynamic_sidecar.swarm_network_id,
            swarm_network_name=scheduler_data.dynamic_sidecar.swarm_network_name,
            dynamic_sidecar_node_id=dynamic_sidecar_node_id,
            entrypoint_container_name=entrypoint_container,
            service_port=scheduler_data.service_port,
        )

        logger.debug(
            "dynamic-sidecar-proxy create_service_params %s",
            json_dumps(dynamic_sidecar_proxy_create_service_params),
        )

        # no need for the id any longer
        await create_service_and_get_id(dynamic_sidecar_proxy_create_service_params)
        scheduler_data.dynamic_sidecar.were_services_created = True

        scheduler_data.dynamic_sidecar.was_compose_spec_submitted = True
Пример #30
0
async def test_dask_sub_handlers(
    dask_client: DaskClient,
    user_id: UserID,
    project_id: ProjectID,
    cluster_id: ClusterID,
    cpu_image: ImageParams,
    mocked_node_ports: None,
    mocked_user_completed_cb: mock.AsyncMock,
    fake_task_handlers: TaskHandlers,
):
    dask_client.register_handlers(fake_task_handlers)
    _DASK_START_EVENT = "start"

    def fake_remote_fct(
        docker_auth: DockerBasicAuth,
        service_key: str,
        service_version: str,
        input_data: TaskInputData,
        output_data_keys: TaskOutputDataSchema,
        log_file_url: AnyUrl,
        command: List[str],
    ) -> TaskOutputData:

        state_pub = distributed.Pub(TaskStateEvent.topic_name())
        progress_pub = distributed.Pub(TaskProgressEvent.topic_name())
        logs_pub = distributed.Pub(TaskLogEvent.topic_name())
        state_pub.put("my name is state")
        progress_pub.put("my name is progress")
        logs_pub.put("my name is logs")
        # tell the client we are done
        published_event = Event(name=_DASK_START_EVENT)
        published_event.set()

        return TaskOutputData.parse_obj({"some_output_key": 123})

    # run the computation
    node_id_to_job_ids = await dask_client.send_computation_tasks(
        user_id=user_id,
        project_id=project_id,
        cluster_id=cluster_id,
        tasks=cpu_image.fake_tasks,
        callback=mocked_user_completed_cb,
        remote_fct=fake_remote_fct,
    )
    assert node_id_to_job_ids
    assert len(node_id_to_job_ids) == 1
    node_id, job_id = node_id_to_job_ids[0]
    assert node_id in cpu_image.fake_tasks
    computation_future = distributed.Future(job_id)
    print("--> waiting for job to finish...")
    await distributed.wait(computation_future, timeout=_ALLOW_TIME_FOR_GATEWAY_TO_CREATE_WORKERS)  # type: ignore
    assert computation_future.done()
    print("job finished, now checking that we received the publications...")

    async for attempt in AsyncRetrying(
        reraise=True,
        wait=wait_fixed(1),
        stop=stop_after_delay(5),
    ):
        with attempt:
            print(
                f"waiting for call in mocked fct {fake_task_handlers}, "
                f"Attempt={attempt.retry_state.attempt_number}"
            )
            # we should have received data in our TaskHandlers
            fake_task_handlers.task_change_handler.assert_called_with(
                "my name is state"
            )
            fake_task_handlers.task_progress_handler.assert_called_with(
                "my name is progress"
            )
            fake_task_handlers.task_log_handler.assert_called_with("my name is logs")
    await _assert_wait_for_cb_call(mocked_user_completed_cb)
Пример #31
0
    async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
        # invoke container cleanup at this point
        dynamic_sidecar_client: DynamicSidecarClient = get_dynamic_sidecar_client(app)

        try:
            await dynamic_sidecar_client.begin_service_destruction(
                dynamic_sidecar_endpoint=scheduler_data.dynamic_sidecar.endpoint
            )
        except Exception as e:  # pylint: disable=broad-except
            logger.warning(
                "Could not contact dynamic-sidecar to begin destruction of %s\n%s",
                scheduler_data.service_name,
                str(e),
            )

        app_settings: AppSettings = app.state.settings
        dynamic_sidecar_settings: DynamicSidecarSettings = (
            app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR
        )

        if scheduler_data.dynamic_sidecar.service_removal_state.can_save:
            dynamic_sidecar_client = get_dynamic_sidecar_client(app)
            dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint

            logger.info(
                "Calling into dynamic-sidecar to save state and pushing data to nodeports"
            )
            try:
                tasks = [
                    dynamic_sidecar_client.service_push_output_ports(
                        dynamic_sidecar_endpoint,
                    )
                ]
                # When enabled no longer uploads state via nodeports
                # S3 is used to store state paths
                if not app_settings.DIRECTOR_V2_DEV_FEATURES_ENABLED:
                    tasks.append(
                        dynamic_sidecar_client.service_save_state(
                            dynamic_sidecar_endpoint,
                        )
                    )
                await logged_gather(*tasks)
                logger.info("Ports data pushed by dynamic-sidecar")
            except Exception as e:  # pylint: disable=broad-except
                logger.warning(
                    (
                        "Could not contact dynamic-sidecar to save service "
                        "state and upload outputs %s\n%s"
                    ),
                    scheduler_data.service_name,
                    str(e),
                )

        # remove the 2 services
        await remove_dynamic_sidecar_stack(
            node_uuid=scheduler_data.node_uuid,
            dynamic_sidecar_settings=dynamic_sidecar_settings,
        )
        # remove network
        await remove_dynamic_sidecar_network(
            scheduler_data.dynamic_sidecar_network_name
        )

        # remove created inputs and outputs volumes
        async for attempt in AsyncRetrying(
            wait=wait_exponential(min=1),
            stop=stop_after_delay(20),
            retry_error_cls=GenericDockerError,
        ):
            with attempt:
                logger.info(
                    "Trying to remove volumes for %s", scheduler_data.service_name
                )
                await remove_dynamic_sidecar_volumes(scheduler_data.node_uuid)

        logger.debug(
            "Removed dynamic-sidecar created services for '%s'",
            scheduler_data.service_name,
        )

        await app.state.dynamic_sidecar_scheduler.finish_service_removal(
            scheduler_data.node_uuid
        )

        scheduler_data.dynamic_sidecar.service_removal_state.mark_removed()