def aim(ctx, config_file, fmt, debug): """Group for AIM cli.""" if debug: logging.getLogger().setLevel(logging.DEBUG) else: logging.getLogger().setLevel(logging.INFO) if ctx.obj is None: ctx.obj = {} args = [] if config_file: for file in config_file: args += ['--config-file', file] config.CONF(project='aim', args=args) if not config.CONF.config_file: raise exc.UsageError( "Unable to find configuration file via the default " "search paths (~/.aim/, ~/, /etc/aim/, /etc/) and " "the '--config-file' option %s!" % config_file) ctx.obj['conf'] = config.CONF ctx.obj['fmt'] = DEFAULT_FORMAT if fmt in AVAILABLE_FORMATS: global curr_format curr_format = fmt ctx.obj['fmt'] = fmt api.get_store()
def _heartbeat_loop(self): start_time = time.time() aim_ctx = context.AimContext(store=api.get_store()) self._send_heartbeat(aim_ctx) utils.wait_for_next_cycle(start_time, self.report_interval, LOG, readable_caller='AID-HB', notify_exceeding_timeout=False)
def hashtree(ctx): aim_ctx = context.AimContext(store=api.get_store(expire_on_commit=True)) tree_mgr = tree_manager.HashTreeManager() manager = aim_manager.AimManager() ctx.obj['manager'] = manager ctx.obj['tree_mgr'] = tree_mgr ctx.obj['aim_ctx'] = aim_ctx
def k8swatcher(ctx): store = api.get_store(expire_on_commit=True) if not isinstance(store, aim_store.K8sStore): msg = ('Incorrect AIM store type. Expected %s, ' 'found %s' % (aim_store.K8sStore.__name__, type(store).__name__)) raise click.UsageError(msg)
def __init__(self, conf): self.run_daemon_loop = True self.host = conf.aim.aim_service_identifier aim_ctx = context.AimContext(store=api.get_store()) # This config manager is shared between multiple threads. Therefore # all DB activity through this config manager will use the same # DB session which can result in conflicts. # TODO(amitbose) Fix ConfigManager to not use cached AimContext self.conf_manager = aim_cfg.ConfigManager(aim_ctx, self.host) self.k8s_watcher = None self.single_aid = False if conf.aim.aim_store == 'k8s': self.single_aid = True self.k8s_watcher = k8s_watcher.K8sWatcher() self.k8s_watcher.run() self.multiverse = [] # Define multiverse pairs, First position is desired state self.multiverse += [ # Configuration Universe (AIM to ACI) {DESIRED: aim_universe.AimDbUniverse().initialize( self.conf_manager, self.multiverse), CURRENT: aci_universe.AciUniverse().initialize( self.conf_manager, self.multiverse)}, # Operational Universe (ACI to AIM) {DESIRED: aci_universe.AciOperationalUniverse().initialize( self.conf_manager, self.multiverse), CURRENT: aim_universe.AimDbOperationalUniverse().initialize( self.conf_manager, self.multiverse)}, # Monitored Universe (ACI to AIM) {DESIRED: aci_universe.AciMonitoredUniverse().initialize( self.conf_manager, self.multiverse), CURRENT: aim_universe.AimDbMonitoredUniverse().initialize( self.conf_manager, self.multiverse)}, ] # Operational Universes. ACI operational info will be synchronized into # AIM's self.manager = aim_manager.AimManager() self.tree_manager = tree_manager.HashTreeManager() self.agent_id = 'aid-%s' % self.host self.agent = resource.Agent(id=self.agent_id, agent_type=AGENT_TYPE, host=self.host, binary_file=AGENT_BINARY, description=AGENT_DESCRIPTION, version=AGENT_VERSION) # Register agent self.agent = self.manager.create(aim_ctx, self.agent, overwrite=True) # Report procedure should happen asynchronously self.polling_interval = self.conf_manager.get_option_and_subscribe( self._change_polling_interval, 'agent_polling_interval', group='aim') self.report_interval = self.conf_manager.get_option_and_subscribe( self._change_report_interval, 'agent_report_interval', group='aim') self.squash_time = self.conf_manager.get_option_and_subscribe( self._change_squash_time, 'agent_event_squash_time', group='aim') self._spawn_heartbeat_loop() self.events = event_handler.EventHandler().initialize( self.conf_manager) self.max_down_time = 4 * self.report_interval
def __init__(self, conf): self.host = aim_cfg.CONF.aim.aim_service_identifier self.context = context.AimContext(store=api.get_store()) self.conf_manager = aim_cfg.ConfigManager(self.context, self.host) # TODO(ivar): heartbeat for these services? self.sender = event_handler.EventSender() self.sender.initialize(self.conf_manager) self.run_daemon_loop = True
def __init__(self, config): self.cfg = config self.ctx = context.AimContext(store=api.get_store()) self.mgr = aim_manager.AimManager() self.sneak_name_to_klass = { utils.camel_to_snake(x.__name__): x for x in self.mgr.aim_resources }
def tenant_creation_failed(self, aim_object, reason='unknown', error=errors.UNKNOWN): # New context, sessions are not thread safe. store = api.get_store() context = aim_ctx.AimContext(store=store) self.creation_failed(context, aim_object, reason=reason, error=error)
def setUp(self, initialize_hooks=True): super(TestAimDBBase, self).setUp() self.test_id = uuidutils.generate_uuid() aim_cfg.OPTION_SUBSCRIBER_MANAGER = None aci_universe.ws_context = None if not os.environ.get(K8S_STORE_VENV): CONF.set_override('aim_store', 'sql', 'aim') self.engine = api.get_engine() if not TestAimDBBase._TABLES_ESTABLISHED: model_base.Base.metadata.create_all(self.engine) TestAimDBBase._TABLES_ESTABLISHED = True # Uncomment the line below to log SQL statements. Additionally, to # log results of queries, change INFO to DEBUG # # logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG) def clear_tables(): with self.engine.begin() as conn: for table in reversed( model_base.Base.metadata.sorted_tables): conn.execute(table.delete()) self.addCleanup(clear_tables) self.old_add_commit_hook = ( aim_store.SqlAlchemyStore.add_commit_hook) aim_store.SqlAlchemyStore.add_commit_hook = _add_commit_hook def restore_commit_hook(): aim_store.SqlAlchemyStore.add_commit_hook = ( self.old_add_commit_hook) self.addCleanup(restore_commit_hook) aim_store.SqlAlchemyStore._after_transaction_end_2 = ( _after_transaction_end_2) else: CONF.set_override('aim_store', 'k8s', 'aim') CONF.set_override('k8s_namespace', self.test_id, 'aim_k8s') k8s_config_path = os.environ.get(K8S_CONFIG_ENV) if k8s_config_path: CONF.set_override('k8s_config_path', k8s_config_path, 'aim_k8s') aim_store.K8sStore._post_delete = _k8s_post_delete aim_store.K8sStore._post_create = _k8s_post_create global k8s_watcher_instance k8s_watcher_instance = k8s_watcher.K8sWatcher() k8s_watcher_instance.event_handler = mock.Mock() k8s_watcher_instance._renew_klient_watch = mock.Mock() self.addCleanup(self._cleanup_objects) self.store = api.get_store(expire_on_commit=True, initialize_hooks=initialize_hooks) self.ctx = context.AimContext(store=self.store) self.cfg_manager = aim_cfg.ConfigManager(self.ctx, '') self.tt_mgr = tree_manager.HashTreeManager() resource.ResourceBase.__eq__ = resource_equal self.cfg_manager.replace_all(CONF) self.sys_id = self.cfg_manager.get_option('aim_system_id', 'aim')
def _reconciliation_cycle(self, serve=True): # Regenerate context at each reconciliation cycle # TODO(ivar): set request-id so that oslo log can track it aim_ctx = context.AimContext(store=api.get_store()) if serve: LOG.info("Start serving cycle.") tenants = self._calculate_tenants(aim_ctx) # Serve tenants for pair in self.multiverse: pair[DESIRED].serve(aim_ctx, tenants) pair[CURRENT].serve(aim_ctx, tenants) LOG.info("AID %s is currently serving: " "%s" % (self.agent.id, tenants)) LOG.info("Start reconciliation cycle.") # REVISIT(ivar) Might be wise to wait here upon tenant serving to allow # time for events to happen # Observe the two universes to fix their current state with utils.get_rlock(lcon.AID_OBSERVER_LOCK): for pair in self.multiverse: pair[DESIRED].observe(aim_ctx) pair[CURRENT].observe(aim_ctx) delete_candidates = set() vetoes = set() for pair in self.multiverse: pair[DESIRED].vote_deletion_candidates( aim_ctx, pair[CURRENT], delete_candidates, vetoes) pair[CURRENT].vote_deletion_candidates( aim_ctx, pair[DESIRED], delete_candidates, vetoes) # Reconcile everything changes = False for pair in self.multiverse: changes |= pair[CURRENT].reconcile(aim_ctx, pair[DESIRED], delete_candidates) if not changes: LOG.info("Congratulations! your multiverse is nice and synced :)") for pair in self.multiverse: pair[DESIRED].finalize_deletion_candidates(aim_ctx, pair[CURRENT], delete_candidates) pair[CURRENT].finalize_deletion_candidates(aim_ctx, pair[DESIRED], delete_candidates) # Delete tenants if there's consensus for tenant in delete_candidates: # All the universes agree on this tenant cleanup for pair in self.multiverse: for universe in pair.values(): LOG.info("%s removing tenant from AID %s" % (universe.name, tenant)) universe.cleanup_state(aim_ctx, tenant)
def aim(ctx, config_file, debug): """Group for AIM cli.""" if debug: logging.getLogger().setLevel(logging.DEBUG) else: logging.getLogger().setLevel(logging.INFO) if ctx.obj is None: ctx.obj = {} args = [] if config_file: for file in config_file: args += ['--config-file', file] config.CONF(project='aim', args=args) if not config.CONF.config_file: raise exc.UsageError( "Unable to find configuration file via the default " "search paths (~/.aim/, ~/, /etc/aim/, /etc/) and " "the '--config-file' option %s!" % config_file) ctx.obj['conf'] = config.CONF api.get_store()
def get_apic_manager(): apic_config = config.CONF.apic network_config = { 'vlan_ranges': apic_config.vlan_ranges, 'switch_dict': cfg.create_switch_dictionary(), 'vpc_dict': cfg.create_vpc_dictionary(apic_config), 'external_network_dict': cfg.create_external_network_dictionary(), } aim_ctx = context.AimContext(store=api.get_store()) manager = aim_manager.AimManager() db = infra_model.HostLinkManager(aim_ctx, manager) apic_system_id = config.CONF.apic_system_id return apic_manager.APICManager(db, logging, network_config, apic_config, None, None, apic_system_id)
def upgrade(ctx, version): """Used for upgrading database.""" version = version or 'head' ctx.obj['manager'].upgrade(version) # create common tenant aim_ctx = context.AimContext(store=api.get_store(expire_on_commit=True)) aim_mgr = aim_manager.AimManager() common_tenant = resource.Tenant(name='common', monitored=True) if not aim_mgr.get(aim_ctx, common_tenant): aim_mgr.create(aim_ctx, common_tenant) fix_no_nat_l3out_ownership(aim_ctx) click.echo('Rebuilding hash-trees') _reset(aim_mgr)
def _heartbeat_loop(self): start_time = time.time() aim_ctx = context.AimContext(store=api.get_store()) self._send_heartbeat(aim_ctx) # REVISIT: This code should be removed once we've # removed all the locking in AID. if start_time > self.daemon_loop_time: down_time = start_time - self.daemon_loop_time if down_time > DEADLOCK_TIME: utils.perform_harakiri( LOG, "Agent has been down for %s " "seconds." % down_time) utils.wait_for_next_cycle(start_time, self.report_interval, LOG, readable_caller='AID-HB', notify_exceeding_timeout=False)
def upgrade(ctx, version): """Used for upgrading database.""" version = version or 'head' ctx.obj['manager'].upgrade(version) # create common tenant aim_ctx = context.AimContext(store=api.get_store(expire_on_commit=True)) aim_mgr = aim_manager.AimManager() common_tenant = resource.Tenant(name='common', monitored=True) if not aim_mgr.get(aim_ctx, common_tenant): aim_mgr.create(aim_ctx, common_tenant) fix_no_nat_l3out_ownership(aim_ctx) click.echo('Rebuilding hash-trees') # reset hash-trees to account for schema/converter changes listener = hashtree_db_listener.HashTreeDbListener(aim_mgr) listener.reset(aim_ctx.store)
def __init__(self, ctx=None, *args, **kwargs): self.ctx = ctx or context.AimContext(store=api.get_store()) if 'streaming' not in self.ctx.store.features: # TODO(ivar) raise something meaningful raise Exception self.mgr = aim_manager.AimManager() self.tt_mgr = tree_manager.HashTreeManager() self.tt_maker = tree_manager.AimHashTreeMaker() self.tt_builder = tree_manager.HashTreeBuilder(self.mgr) self.klient = self.ctx.store.klient self.namespace = self.ctx.store.namespace self.trees = {} self.q = queue.Queue() self.event_handler = event_handler.EventHandler self._stop = False self._http_resp = None # Tenants whose trees need to be saved in AIM self.affected_tenants = set() self._observe_thread_state = {} self._k8s_types_to_observe = set([]) self._k8s_aim_type_map = {} self._k8s_kinds = set([]) self._needs_init = True for aim_res in aim_manager.AimManager.aim_resources: if issubclass(aim_res, resource.AciResourceBase): k8s_type = self.ctx.store.resource_to_db_type(aim_res) for ktype in ([k8s_type] + k8s_type.aux_objects.values()): self._k8s_types_to_observe.add(ktype) self._k8s_kinds.add(ktype.kind) if ktype != api_v1.AciContainersObject: self._k8s_aim_type_map[ktype.kind] = (aim_res, k8s_type) self._event_filters = { api_v1.Pod: self._pod_event_filter, api_v1.Endpoints: self._endpoints_event_filter }
def daemon_loop(self): aim_ctx = context.AimContext(store=api.get_store()) # Serve tenants the very first time regardless of the events received self._daemon_loop(aim_ctx, True) while True: try: serve = False # wait first event first_event_time = None squash_time = AID_EXIT_CHECK_INTERVAL while squash_time > 0: event = self.events.get_event(squash_time) if not event and first_event_time is None: # This is a lone timeout, just check if we need to exit if not self.run_daemon_loop: LOG.info("Stopping AID main loop.") return continue if not first_event_time: first_event_time = time.time() if event in event_handler.EVENTS + [None]: # Set squash timeout squash_time = (first_event_time + self.squash_time - time.time()) if event == event_handler.EVENT_SERVE: # Serving tenants is required as well serve = True start_time = time.time() self._daemon_loop(aim_ctx, serve) utils.wait_for_next_cycle(start_time, self.polling_interval, LOG, readable_caller='AID', notify_exceeding_timeout=False) except Exception: LOG.error('A error occurred in agent') LOG.error(traceback.format_exc())
def config(ctx): aim_ctx = context.AimContext(store=api.get_store(expire_on_commit=True)) ctx.obj['manager'] = aim_cfg.ConfigManager(aim_ctx, '')
def _reset(aim_mgr): aim_ctx = context.AimContext(store=api.get_store(expire_on_commit=True)) # reset hash-trees to account for schema/converter changes listener = hashtree_db_listener.HashTreeDbListener(aim_mgr) aim_ctx.store.db_session.expunge_all() listener.reset(aim_ctx.store)
def __init__(self, config): self.cfg = config self.ctx = context.AimContext(store=api.get_store()) self.mgr = aim_manager.AimManager() self.sneak_name_to_klass = {utils.camel_to_snake(x.__name__): x for x in self.mgr.aim_resources}
def _thread_monitor(self, flag): login_thread_name = 'login_thread' subscription_thread_name = 'subscription_thread' name_to_retry = { login_thread_name: None, subscription_thread_name: None } max_retries = len(self.ws_urls) recovery_timer = utils.get_time() recovery_retry = 0 aim_context = aim_ctx.AimContext(store=api.get_store()) LOG.debug("Monitoring threads login and subscription") try: while flag['monitor_runs']: for thd, name in [(self.login_thread, 'login_thread'), (self.subs_thread, 'subscription_thread')]: if thd and not thd.isAlive(): if name_to_retry[name] and name_to_retry[name].get( ) >= max_retries: utils.perform_harakiri( LOG, "Critical thread %s stopped " "working" % name) else: name_to_retry[name] = utils.exponential_backoff( self.monitor_max_backoff, tentative=name_to_retry[name]) try: self.establish_ws_session() except Exception as e: LOG.debug( "Monitor for thread %s tried to reconnect " "web socket, but something went wrong. " "Will retry %s more times: %s" % (name, max_retries - name_to_retry[name].get(), str(e))) continue elif thd: LOG.debug("Thread %s is in good shape" % name) name_to_retry[name] = None if self.need_recovery: # No point to do any recovery session if we # only have 1 ws_url. if (len(self.ws_urls) > 1 and utils.get_time() > recovery_timer): self.establish_ws_session(recovery_mode=True) # Still fail to recover if self.need_recovery: recovery_retry += 1 recovery_timer = ( utils.get_time() + utils.get_backoff_time( self.recovery_max_backoff, recovery_retry)) else: recovery_retry = 0 else: # Update the last_update_timestamp if self.apic_assign_obj: self.apic_assign_obj = self.manager.update( aim_context, self.apic_assign_obj) else: # This should never happen LOG.error('There is no such apic_assign_obj exist ' 'for %s!' % self.session.ipaddr) time.sleep(self.monitor_sleep_time) # for testing purposes flag['monitor_runs'] -= 1 except Exception as e: msg = ("Unknown error in thread monitor: %s" % str(e)) LOG.error(msg) utils.perform_harakiri(LOG, msg)
def establish_ws_session(self, max_retries=None, recovery_mode=False): try: with utils.get_rlock(lcon.ACI_WS_CONNECTION_LOCK, blocking=False): if not recovery_mode: purpose = NORMAL_PURPOSE self._reload_websocket_config() self.need_recovery = False else: purpose = RECOVERY_PURPOSE backup_urls = collections.deque() max_retries = max_retries or 2 * len(self.ws_urls) url_max_retries = max(1, max_retries / len(self.ws_urls)) aim_context = aim_ctx.AimContext(store=api.get_store()) for url in self.ws_urls: apic_assign = api_infra.ApicAssignment(apic_host=url) apic_assign_obj = self.manager.get(aim_context, apic_assign) if (apic_assign_obj and apic_assign_obj.aim_aid_id != self.agent_id and not apic_assign_obj.is_available(aim_context)): backup_urls.append(url) continue # This means the original aim-aid owner might have # crashed or something. We will just take it! if (recovery_mode and apic_assign_obj and self.session.ipaddr in url): obj = self._update_apic_assign_db( aim_context, apic_assign, apic_assign_obj) if obj is None: continue self.need_recovery = False self.apic_assign_obj = obj return is_conn_successful = self._ws_session_login( url, url_max_retries, purpose, aim_context, apic_assign, apic_assign_obj) if is_conn_successful: return else: backup_urls.append(url) if recovery_mode: return # Try the backup urls. Randomly rotate the list first so that # the extra aim-aids won't all go for the same backup url. backup_urls_len = len(backup_urls) if backup_urls_len > 1: backup_urls.rotate(random.randint(1, backup_urls_len)) for url in backup_urls: is_conn_successful = self._ws_session_login( url, url_max_retries, BACKUP_PURPOSE) if is_conn_successful: return utils.perform_harakiri( LOG, "Cannot establish WS connection " "after %s retries." % max_retries) except utils.LockNotAcquired: # Some other thread is trying to reconnect return
def manager(ctx): aim_ctx = context.AimContext(store=api.get_store(expire_on_commit=True)) manager = aim_manager.AimManager() ctx.obj['manager'] = manager ctx.obj['aim_ctx'] = aim_ctx
def _heartbeat_loop(self): aim_ctx = context.AimContext(store=api.get_store()) while True: start = time.time() self._send_heartbeat(aim_ctx) time.sleep(max(0, self.report_interval - (time.time() - start)))
def setUp(self, mock_store=True): super(TestAimDBBase, self).setUp() self.test_id = uuidutils.generate_uuid() aim_cfg.OPTION_SUBSCRIBER_MANAGER = None aci_universe.ws_context = None if not os.environ.get(K8S_STORE_VENV): CONF.set_override('aim_store', 'sql', 'aim') self.engine = api.get_engine() if not TestAimDBBase._TABLES_ESTABLISHED: model_base.Base.metadata.create_all(self.engine) TestAimDBBase._TABLES_ESTABLISHED = True # Uncomment the line below to log SQL statements. Additionally, to # log results of queries, change INFO to DEBUG # # logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG) def clear_tables(): with self.engine.begin() as conn: for table in reversed( model_base.Base.metadata.sorted_tables): conn.execute(table.delete()) self.addCleanup(clear_tables) if mock_store: self.old_initialize_hooks = ( aim_store.SqlAlchemyStore._initialize_hooks) aim_store.SqlAlchemyStore.old_initialize_hooks = ( self.old_initialize_hooks) aim_store.SqlAlchemyStore._initialize_hooks = _initialize_hooks def restore_initialize_hook(): aim_store.SqlAlchemyStore._initialize_hooks = ( self.old_initialize_hooks) self.addCleanup(restore_initialize_hook) aim_store.SqlAlchemyStore._catch_up_logs = _catch_up_logs else: CONF.set_override('aim_store', 'k8s', 'aim') CONF.set_override('k8s_namespace', self.test_id, 'aim_k8s') k8s_config_path = os.environ.get(K8S_CONFIG_ENV) if k8s_config_path: CONF.set_override('k8s_config_path', k8s_config_path, 'aim_k8s') aim_store.K8sStore._post_delete = _k8s_post_delete aim_store.K8sStore._post_create = _k8s_post_create global k8s_watcher_instance k8s_watcher_instance = k8s_watcher.K8sWatcher() k8s_watcher_instance.event_handler = mock.Mock() k8s_watcher_instance._renew_klient_watch = mock.Mock() self.addCleanup(self._cleanup_objects) self.store = api.get_store(expire_on_commit=True) def unregister_catch_up(): self.store.unregister_after_transaction_ends_callback( '_catch_up_logs') self.addCleanup(unregister_catch_up) self.ctx = context.AimContext(store=self.store) self.cfg_manager = aim_cfg.ConfigManager(self.ctx, '') self.tt_mgr = tree_manager.HashTreeManager() resource.ResourceBase.__eq__ = resource_equal self.cfg_manager.replace_all(CONF) self.sys_id = self.cfg_manager.get_option('aim_system_id', 'aim')