async def log_cleanup_task(app: web.Application, interval): dbpool = app['dbpool'] etcd = app['config_server'].etcd raw_lifetime = await etcd.get('config/logs/error/retention') if raw_lifetime is None: raw_lifetime = '90d' checker = tx.TimeDuration() try: lifetime = checker.check(raw_lifetime) except ValueError: lifetime = dt.timedelta(days=90) log.info('Retention value specified in etcd not recognized by' 'trafaret validator, falling back to 90 days') boundary = datetime.now() - lifetime try: lock = await app['log_cleanup_lock'].lock('gateway.logs') async with lock: async with dbpool.acquire() as conn, conn.begin(): query = (sa.select([error_logs.c.id ]).select_from(error_logs).where( error_logs.c.created_at < boundary)) result = await conn.execute(query) log_ids = [] async for row in result: log_ids.append(row['id']) if len(log_ids) > 0: log.info('Cleaning up {} log{}', len(log_ids), 's' if len(log_ids) > 1 else '') query = error_logs.delete().where(error_logs.c.id.in_(log_ids)) result = await conn.execute(query) assert result.rowcount == len(log_ids) except aioredlock.LockError: log.debug('schedule(): temporary locking failure; will be retried.')
async def stream_conn_tracker_gc(app: web.Application) -> None: redis_live: aioredis.Redis = app['redis_live'] shared_config: SharedConfig = app['shared_config'] try: while True: no_packet_timeout: timedelta = tx.TimeDuration().check( await shared_config.etcd.get('config/idle/app-streaming-packet-timeout', '5m') ) async with app['conn_tracker_lock']: now = await redis_live.time() for session_id in app['active_session_ids'].keys(): conn_tracker_key = f"session.{session_id}.active_app_connections" prev_remaining_count = await redis_live.zcount(conn_tracker_key) removed_count = await redis_live.zremrangebyscore( conn_tracker_key, float('-inf'), now - no_packet_timeout.total_seconds(), ) remaining_count = await redis_live.zcount(conn_tracker_key) log.debug(f"conn_tracker: gc {session_id} " f"removed/remaining = {removed_count}/{remaining_count}") if prev_remaining_count > 0 and remaining_count == 0: for idle_checker in app['idle_checkers']: await idle_checker.update_app_streaming_status( session_id, AppStreamingStatus.NO_ACTIVE_CONNECTIONS, ) await asyncio.sleep(10) except asyncio.CancelledError: pass
class TimeoutIdleChecker(BaseIdleChecker): """ Checks the idleness of a session by the elapsed time since last used. The usage means processing of any computation requests, such as query/batch-mode code execution and having active service-port connections. """ name: ClassVar[str] = "timeout" _config_iv = t.Dict({ t.Key('threshold', default="10m"): tx.TimeDuration(), }).allow_extra('*') idle_timeout: timedelta _policy_cache: ContextVar[Dict[AccessKey, Optional[Mapping[str, Any]]]] async def __ainit__(self) -> None: await super().__ainit__() self._policy_cache = ContextVar('_policy_cache') self._evh_session_started = \ self._event_dispatcher.consume("session_started", None, self._session_started_cb) self._evh_execution_started = \ self._event_dispatcher.consume("execution_started", None, self._execution_started_cb) self._evh_execution_finished = \ self._event_dispatcher.consume("execution_finished", None, self._execution_exited_cb) self._evh_execution_timeout = \ self._event_dispatcher.consume("execution_timeout", None, self._execution_exited_cb) self._evh_execution_cancelled = \ self._event_dispatcher.consume("execution_cancelled", None, self._execution_exited_cb) async def aclose(self) -> None: self._event_dispatcher.unconsume("session_started", self._evh_session_started) self._event_dispatcher.unconsume("execution_started", self._evh_execution_started) self._event_dispatcher.unconsume("execution_finished", self._evh_execution_finished) self._event_dispatcher.unconsume("execution_timeout", self._evh_execution_timeout) self._event_dispatcher.unconsume("execution_cancelled", self._evh_execution_cancelled) await super().aclose() async def populate_config(self, raw_config: Mapping[str, Any]) -> None: config = self._config_iv.check(raw_config) self.idle_timeout = config['threshold'] log.info( 'TimeoutIdleChecker: default idle_timeout = {0:,} seconds', self.idle_timeout.total_seconds(), ) async def update_app_streaming_status( self, session_id: SessionId, status: AppStreamingStatus, ) -> None: if status == AppStreamingStatus.HAS_ACTIVE_CONNECTIONS: await self._disable_timeout(session_id) elif status == AppStreamingStatus.NO_ACTIVE_CONNECTIONS: await self._update_timeout(session_id) async def _disable_timeout(self, session_id: SessionId) -> None: log.debug(f"TimeoutIdleChecker._disable_timeout({session_id})") await self._redis.set(f"session.{session_id}.last_access", "0", exist=self._redis.SET_IF_EXIST) async def _update_timeout(self, session_id: SessionId) -> None: log.debug(f"TimeoutIdleChecker._update_timeout({session_id})") t = await self._redis.time() await self._redis.set( f"session.{session_id}.last_access", f"{t:.06f}", expire=max(86400, self.idle_timeout.total_seconds() * 2), ) async def _session_started_cb( self, context: Any, agent_id: AgentId, event_name: str, session_id: SessionId, creation_id: str, ) -> None: await self._update_timeout(session_id) async def _execution_started_cb( self, context: Any, agent_id: AgentId, event_name: str, session_id: SessionId, ) -> None: await self._disable_timeout(session_id) async def _execution_exited_cb( self, context: Any, agent_id: AgentId, event_name: str, session_id: SessionId, ) -> None: await self._update_timeout(session_id) async def _do_idle_check(self, context: Any, agent_id: AgentId, event_name: str) -> None: cache_token = self._policy_cache.set(dict()) try: return await super()._do_idle_check(context, agent_id, event_name) finally: self._policy_cache.reset(cache_token) async def check_session(self, session: RowProxy, dbconn: SAConnection) -> bool: session_id = session['id'] active_streams = await self._redis.zcount( f"session.{session_id}.active_app_connections") if active_streams is not None and active_streams > 0: return True t = await self._redis.time() raw_last_access = await self._redis.get( f"session.{session_id}.last_access") if raw_last_access is None or raw_last_access == "0": return True last_access = float(raw_last_access) # serves as the default fallback if keypair resource policy's idle_timeout is "undefined" idle_timeout = self.idle_timeout.total_seconds() policy_cache = self._policy_cache.get() policy = policy_cache.get(session['access_key'], None) if policy is None: query = (sa.select([keypair_resource_policies]).select_from( sa.join( keypairs, keypair_resource_policies, (keypair_resource_policies.c.name == keypairs.c.resource_policy), )).where(keypairs.c.access_key == session['access_key'])) result = await dbconn.execute(query) policy = await result.first() assert policy is not None policy_cache[session['access_key']] = policy # setting idle_timeout: # - zero/inf means "infinite" # - negative means "undefined" if policy['idle_timeout'] >= 0: idle_timeout = float(policy['idle_timeout']) if ((idle_timeout <= 0) or (math.isinf(idle_timeout) and idle_timeout > 0) or (t - last_access <= idle_timeout)): return True return False