Пример #1
0
async def reenter_txn(pool: SAEngine, conn: SAConnection):
    if conn is None:
        async with pool.acquire() as conn, conn.begin():
            yield conn
    else:
        async with conn.begin_nested():
            yield conn
Пример #2
0
    async def _schedule_multi_node_session(
        self,
        sched_ctx: SchedulingContext,
        scheduler: AbstractScheduler,
        agent_db_conn: SAConnection,
        kernel_db_conn: SAConnection,
        sgroup_name: str,
        candidate_agents: Sequence[AgentContext],
        sess_ctx: PendingSession,
        check_results: List[Tuple[str, Union[Exception, PredicateResult]]],
    ) -> Tuple[PendingSession, List[KernelAgentBinding]]:
        # Assign agent resource per kernel in the session.
        log_fmt = _log_fmt.get()
        log_args = _log_args.get()
        agent_query_extra_conds = None
        kernel_agent_bindings: List[KernelAgentBinding] = []
        async with agent_db_conn.begin(isolation_level="REPEATABLE READ"):
            # This outer transaction is rolled back when any exception occurs inside,
            # including scheduling failures of a kernel.
            # It ensures that occupied_slots are recovered when there are partial
            # scheduling failures.
            for kernel in sess_ctx.kernels:
                try:
                    agent_id = scheduler.assign_agent_for_kernel(
                        candidate_agents, kernel)
                    if agent_id is None:
                        raise InstanceNotAvailable
                    async with agent_db_conn.begin_nested():
                        agent_alloc_ctx = await _reserve_agent(
                            sched_ctx,
                            agent_db_conn,
                            sgroup_name,
                            agent_id,
                            kernel.requested_slots,
                            extra_conds=agent_query_extra_conds,
                        )
                        candidate_agents = await _list_agents_by_sgroup(
                            agent_db_conn, sgroup_name)
                except InstanceNotAvailable:
                    log.debug(log_fmt + 'no-available-instances', *log_args)
                    async with kernel_db_conn.begin():
                        await _invoke_failure_callbacks(
                            kernel_db_conn,
                            sched_ctx,
                            sess_ctx,
                            check_results,
                        )
                        query = kernels.update().values({
                            'status_info':
                            "no-available-instances",
                            'status_data':
                            sql_json_increment(kernels.c.status_data,
                                               ('scheduler', 'retries'),
                                               parent_updates={
                                                   'last_try':
                                                   datetime.now(
                                                       tzutc()).isoformat(),
                                               }),
                        }).where(kernels.c.id == kernel.kernel_id)
                        await kernel_db_conn.execute(query)
                    raise
                except Exception as e:
                    log.exception(
                        log_fmt + 'unexpected-error, during agent allocation',
                        *log_args,
                    )
                    async with kernel_db_conn.begin():
                        await _invoke_failure_callbacks(
                            kernel_db_conn,
                            sched_ctx,
                            sess_ctx,
                            check_results,
                        )
                        query = kernels.update().values({
                            'status_info':
                            "scheduler-error",
                            'status_data':
                            convert_to_status_data(e),
                        }).where(kernels.c.id == kernel.kernel_id)
                        await kernel_db_conn.execute(query)
                    raise
                else:
                    kernel_agent_bindings.append(
                        KernelAgentBinding(kernel, agent_alloc_ctx))

        if len(kernel_agent_bindings) == len(sess_ctx.kernels):
            # Proceed to PREPARING only when all kernels are successfully scheduled.
            async with kernel_db_conn.begin():
                for binding in kernel_agent_bindings:
                    query = kernels.update().values({
                        'agent':
                        binding.agent_alloc_ctx.agent_id,
                        'agent_addr':
                        binding.agent_alloc_ctx.agent_addr,
                        'scaling_group':
                        sgroup_name,
                        'status':
                        KernelStatus.PREPARING,
                        'status_info':
                        'scheduled',
                        'status_data': {},
                        'status_changed':
                        datetime.now(tzutc()),
                    }).where(kernels.c.id == binding.kernel.kernel_id)
                    await kernel_db_conn.execute(query)

        return (sess_ctx, kernel_agent_bindings)