def periodic_sync_routers_task(self, context): self.process_services_sync(context) LOG.debug("Starting periodic_sync_routers_task - fullsync:%s", self.fullsync) if not self.fullsync: return # self.fullsync is True at this point. If an exception -- caught or # uncaught -- prevents setting it to False below then the next call # to periodic_sync_routers_task will re-enter this code and try again. # Capture a picture of namespaces *before* fetching the full list from # the database. This is important to correctly identify stale ones. namespaces = set() if self._clean_stale_namespaces: namespaces = self._list_namespaces() prev_router_ids = set(self.router_info) timestamp = timeutils.utcnow() try: if self.conf.use_namespaces: routers = self.plugin_rpc.get_routers(context) else: routers = self.plugin_rpc.get_routers(context, [self.conf.router_id]) except oslo_messaging.MessagingException: LOG.exception(_LE("Failed synchronizing routers due to RPC error")) else: LOG.debug('Processing :%r', routers) for r in routers: update = queue.RouterUpdate(r['id'], queue.PRIORITY_SYNC_ROUTERS_TASK, router=r, timestamp=timestamp) self._queue.add(update) self.fullsync = False LOG.debug("periodic_sync_routers_task successfully completed") # Resync is not necessary for the cleanup of stale namespaces curr_router_ids = set([r['id'] for r in routers]) # Two kinds of stale routers: Routers for which info is cached in # self.router_info and the others. First, handle the former. for router_id in prev_router_ids - curr_router_ids: update = queue.RouterUpdate(router_id, queue.PRIORITY_SYNC_ROUTERS_TASK, timestamp=timestamp, action=queue.DELETE_ROUTER) self._queue.add(update) # Next, one effort to clean out namespaces for which we don't have # a record. (i.e. _clean_stale_namespaces=False after one pass) if self._clean_stale_namespaces: ids_to_keep = curr_router_ids | prev_router_ids self._cleanup_namespaces(namespaces, ids_to_keep)
def test_updates(self): master = l3_queue.ExclusiveRouterProcessor(FAKE_ID) not_master = l3_queue.ExclusiveRouterProcessor(FAKE_ID) master.queue_update(l3_queue.RouterUpdate(FAKE_ID, 0)) not_master.queue_update(l3_queue.RouterUpdate(FAKE_ID, 0)) for update in not_master.updates(): raise Exception("Only the master should process a router") self.assertEqual(2, len([i for i in master.updates()]))
def create_pd_router_update(self): router_id = None update = queue.RouterUpdate(router_id, queue.PRIORITY_PD_UPDATE, timestamp=timeutils.utcnow(), action=queue.PD_UPDATE) self._queue.add(update)
def router_removed_from_agent(self, context, payload): LOG.debug('Got router removed from agent :%r', payload) router_id = payload['router_id'] update = queue.RouterUpdate(router_id, queue.PRIORITY_RPC, action=queue.DELETE_ROUTER) self._queue.add(update)
def router_deleted(self, context, router_id): """Deal with router deletion RPC message.""" LOG.debug('Got router deleted notification for %s', router_id) update = queue.RouterUpdate(router_id, queue.PRIORITY_RPC, action=queue.DELETE_ROUTER) self._queue.add(update)
def network_update(self, context, **kwargs): network_id = kwargs['network']['id'] for ri in self.router_info.values(): ports = itertools.chain(ri.internal_ports, [ri.ex_gw_port]) port_belongs = lambda p: p['network_id'] == network_id if any(port_belongs(p) for p in ports): update = queue.RouterUpdate( ri.router_id, queue.PRIORITY_SYNC_ROUTERS_TASK) self._resync_router(update)
def routers_updated(self, context, routers): """Deal with routers modification and creation RPC message.""" LOG.debug('Got routers updated notification :%s', routers) if routers: # This is needed for backward compatibility if isinstance(routers[0], dict): routers = [router['id'] for router in routers] for id in routers: update = queue.RouterUpdate(id, queue.PRIORITY_RPC) self._queue.add(update)
def test_hit_retry_limit(self): tries = 1 queue = l3_queue.RouterProcessingQueue() update = l3_queue.RouterUpdate(FAKE_ID, l3_queue.PRIORITY_RPC, tries=tries) queue.add(update) self.assertFalse(update.hit_retry_limit()) queue.add(update) self.assertTrue(update.hit_retry_limit())
def fetch_and_sync_all_routers(self, context, ns_manager): prev_router_ids = set(self.router_info) timestamp = timeutils.utcnow() try: if self.conf.use_namespaces: routers = self.plugin_rpc.get_routers(context) else: routers = self.plugin_rpc.get_routers(context, [self.conf.router_id]) except oslo_messaging.MessagingException: LOG.exception(_LE("Failed synchronizing routers due to RPC error")) raise n_exc.AbortSyncRouters() else: LOG.debug('Processing :%r', routers) for r in routers: ns_manager.keep_router(r['id']) if r.get('distributed'): # need to keep fip namespaces as well ext_net_id = (r['external_gateway_info'] or {}).get('network_id') if ext_net_id: ns_manager.keep_ext_net(ext_net_id) update = queue.RouterUpdate(r['id'], queue.PRIORITY_SYNC_ROUTERS_TASK, router=r, timestamp=timestamp) self._queue.add(update) self.fullsync = False LOG.debug("periodic_sync_routers_task successfully completed") curr_router_ids = set([r['id'] for r in routers]) # Delete routers that have disappeared since the last sync for router_id in prev_router_ids - curr_router_ids: ns_manager.keep_router(router_id) update = queue.RouterUpdate(router_id, queue.PRIORITY_SYNC_ROUTERS_TASK, timestamp=timestamp, action=queue.DELETE_ROUTER) self._queue.add(update)
def fetch_and_sync_all_routers(self, context, ns_manager): prev_router_ids = set(self.router_info) curr_router_ids = set() timestamp = timeutils.utcnow() router_ids = [] chunk = [] is_snat_agent = (self.conf.agent_mode == lib_const.L3_AGENT_MODE_DVR_SNAT) try: router_ids = self.plugin_rpc.get_router_ids(context) # We set HA network port status to DOWN to let l2 agent update it # to ACTIVE after wiring. This allows us to spawn keepalived only # when l2 agent finished wiring the port. self.plugin_rpc.update_all_ha_network_port_statuses(context) # fetch routers by chunks to reduce the load on server and to # start router processing earlier for i in range(0, len(router_ids), self.sync_routers_chunk_size): chunk = router_ids[i:i + self.sync_routers_chunk_size] routers = self.plugin_rpc.get_routers(context, chunk) LOG.debug('Processing :%r', routers) for r in routers: curr_router_ids.add(r['id']) ns_manager.keep_router(r['id']) if r.get('distributed'): # need to keep fip namespaces as well ext_net_id = (r['external_gateway_info'] or {}).get( 'network_id') if ext_net_id: ns_manager.keep_ext_net(ext_net_id) elif is_snat_agent and not r.get('ha'): ns_manager.ensure_snat_cleanup(r['id']) update = queue.RouterUpdate( r['id'], queue.PRIORITY_SYNC_ROUTERS_TASK, router=r, timestamp=timestamp) self._queue.add(update) except oslo_messaging.MessagingTimeout: if self.sync_routers_chunk_size > SYNC_ROUTERS_MIN_CHUNK_SIZE: self.sync_routers_chunk_size = max( self.sync_routers_chunk_size / 2, SYNC_ROUTERS_MIN_CHUNK_SIZE) LOG.error('Server failed to return info for routers in ' 'required time, decreasing chunk size to: %s', self.sync_routers_chunk_size) else: LOG.error('Server failed to return info for routers in ' 'required time even with min chunk size: %s. ' 'It might be under very high load or ' 'just inoperable', self.sync_routers_chunk_size) raise except oslo_messaging.MessagingException: failed_routers = chunk or router_ids LOG.exception("Failed synchronizing routers '%s' " "due to RPC error", failed_routers) raise n_exc.AbortSyncRouters() self.fullsync = False LOG.debug("periodic_sync_routers_task successfully completed") # adjust chunk size after successful sync if self.sync_routers_chunk_size < SYNC_ROUTERS_MAX_CHUNK_SIZE: self.sync_routers_chunk_size = min( self.sync_routers_chunk_size + SYNC_ROUTERS_MIN_CHUNK_SIZE, SYNC_ROUTERS_MAX_CHUNK_SIZE) # Delete routers that have disappeared since the last sync for router_id in prev_router_ids - curr_router_ids: ns_manager.keep_router(router_id) update = queue.RouterUpdate(router_id, queue.PRIORITY_SYNC_ROUTERS_TASK, timestamp=timestamp, action=queue.DELETE_ROUTER) self._queue.add(update)
def fetch_and_sync_all_routers(self, context, ns_manager): prev_router_ids = set(self.router_info) curr_router_ids = set() timestamp = timeutils.utcnow() try: router_ids = ([self.conf.router_id] if self.conf.router_id else self.plugin_rpc.get_router_ids(context)) # fetch routers by chunks to reduce the load on server and to # start router processing earlier for i in range(0, len(router_ids), self.sync_routers_chunk_size): routers = self.plugin_rpc.get_routers( context, router_ids[i:i + self.sync_routers_chunk_size]) LOG.debug('Processing :%r', routers) for r in routers: curr_router_ids.add(r['id']) ns_manager.keep_router(r['id']) if r.get('distributed'): # need to keep fip namespaces as well ext_net_id = (r['external_gateway_info'] or {}).get( 'network_id') if ext_net_id: ns_manager.keep_ext_net(ext_net_id) update = queue.RouterUpdate( r['id'], queue.PRIORITY_SYNC_ROUTERS_TASK, router=r, timestamp=timestamp) self._queue.add(update) except oslo_messaging.MessagingTimeout: if self.sync_routers_chunk_size > SYNC_ROUTERS_MIN_CHUNK_SIZE: self.sync_routers_chunk_size = max( self.sync_routers_chunk_size / 2, SYNC_ROUTERS_MIN_CHUNK_SIZE) LOG.error(_LE('Server failed to return info for routers in ' 'required time, decreasing chunk size to: %s'), self.sync_routers_chunk_size) else: LOG.error(_LE('Server failed to return info for routers in ' 'required time even with min chunk size: %s. ' 'It might be under very high load or ' 'just inoperable'), self.sync_routers_chunk_size) raise except oslo_messaging.MessagingException: LOG.exception(_LE("Failed synchronizing routers due to RPC error")) raise n_exc.AbortSyncRouters() self.fullsync = False LOG.debug("periodic_sync_routers_task successfully completed") # adjust chunk size after successful sync if self.sync_routers_chunk_size < SYNC_ROUTERS_MAX_CHUNK_SIZE: self.sync_routers_chunk_size = min( self.sync_routers_chunk_size + SYNC_ROUTERS_MIN_CHUNK_SIZE, SYNC_ROUTERS_MAX_CHUNK_SIZE) # Delete routers that have disappeared since the last sync for router_id in prev_router_ids - curr_router_ids: ns_manager.keep_router(router_id) update = queue.RouterUpdate(router_id, queue.PRIORITY_SYNC_ROUTERS_TASK, timestamp=timestamp, action=queue.DELETE_ROUTER) self._queue.add(update)