def test_bad_canopsis_event_raises(self): event = {} with AmqpConnection(self.amqp_url) as ac: amqp_pub = AmqpPublisher(ac, Mock()) with self.assertRaises(KeyError): amqp_pub.canopsis_event(event, self.amqp_exname)
def __init__( self, namespace, confnamespace='object', storage=None, autolog=False, amqp_pub=None, *args, **kwargs ): super(Archiver, self).__init__() self.namespace = namespace self.namespace_log = namespace + '_log' # Bulk operation configuration self.last_bulk_insert_date = time() self.bulk_ids = [] # How many events can be buffered self.bulk_amount = 500 # What is the maximum duration until bulk insert self.bulk_delay = 3 self.incoming_events = {} self.autolog = autolog self.logger.debug("Init Archiver on %s" % namespace) self.account = Account(user="******", group="root") if not storage: self.logger.debug(" + Get storage") self.storage = get_storage( namespace=namespace, logging_level=self.log_lvl ) else: self.storage = storage self.conf_storage = get_storage( namespace=confnamespace, logging_level=self.log_lvl ) self.conf_collection = self.conf_storage.get_backend(confnamespace) self.collection = self.storage.get_backend(namespace) if amqp_pub is None: self.amqp_pub = AmqpPublisher( get_default_amqp_connection(), self.logger) self.reset_stealthy_event_duration = time() self.reset_stats()
def __init__(self, logger=None, *args, **kwargs): """__init__ :param *args: :param **kwargs: """ super(ContextGraphImport, self).__init__(logger, *args, **kwargs) if logger is not None: self.logger = logger # Entities asked for update self.entities_to_update = {} # Entities to update self.update = {} self.delete = [] self.amqp_pub = AmqpPublisher(get_default_amqp_conn(), self.logger)
def __init__(self, amqp_pub=None): """ :param amqp_pub canopsis.common.amqp.AmqpPublisher: """ self.logger = Logger.get('watcher', LOG_PATH) self.watcher_storage = Middleware.get_middleware_by_uri( 'mongodb-default-watcher://') self.alert_storage = Middleware.get_middleware_by_uri( 'mongodb-periodical-alarm://') self.sla_storage = Middleware.get_middleware_by_uri( 'storage-default-sla://') self.context_graph = ContextGraph(self.logger) self.pbehavior_manager = PBehaviorManager( *PBehaviorManager.provide_default_basics()) self.amqp_pub = amqp_pub if amqp_pub is None: self.amqp_pub = AmqpPublisher(get_default_amqp_conn(), self.logger)
def provide_default_basics(cls, logger): """ Returns the default collection for the manager. ! Do not use in tests ! :rtype: (canopsis.common.collection.MongoCollection, canopsis.common.amqp.AmqpPublisher) """ store = MongoStore.get_default() collection = store.get_collection(name=cls.COLLECTION) amqp_pub = AmqpPublisher(get_default_amqp_conn(), logger) return (MongoCollection(collection), amqp_pub)
def get_default_app(logger=None, webconf=None, amqp_conn=None, amqp_pub=None): if webconf is None: webconf = Configuration.load(WebServer.CONF_PATH, Ini) if logger is None: logger = Logger.get('webserver', WebServer.LOG_FILE) if amqp_conn is None: amqp_conn = get_default_amqp_connection() if amqp_pub is None: amqp_pub = AmqpPublisher(amqp_conn, logger) # Declare WSGI application ws = WebServer(config=webconf, logger=logger, amqp_pub=amqp_pub).init_app() app = ws.application return app
def __init__(self, amqp_pub=None): """ :param amqp_pub canopsis.common.amqp.AmqpPublisher: """ self.logger = Logger.get('watcher', LOG_PATH) self.watcher_storage = Middleware.get_middleware_by_uri( 'mongodb-default-watcher://') self.alert_storage = Middleware.get_middleware_by_uri( 'mongodb-periodical-alarm://') self.sla_storage = Middleware.get_middleware_by_uri( 'storage-default-sla://') self.context_graph = ContextGraph(self.logger) self.pbehavior_manager = PBehaviorManager( *PBehaviorManager.provide_default_basics() ) self.amqp_pub = amqp_pub if amqp_pub is None: self.amqp_pub = AmqpPublisher(get_default_amqp_conn(), self.logger)
def provide_default_basics(cls): """ Provide logger, config, storages... ! Do not use in tests ! :rtype: Union[canopsis.confng.simpleconf.Configuration logging.Logger, canopsis.storage.core.Storage, canopsis.common.ethereal_data.EtherealData, canopsis.storage.core.Storage, canopsis.context_graph.manager.ContextGraph, canopsis.watcher.manager.Watcher] """ config = Configuration.load(Alerts.CONF_PATH, Ini) conf_store = Configuration.load(MongoStore.CONF_PATH, Ini) mongo = MongoStore(config=conf_store) config_collection = MongoCollection( mongo.get_collection(name=cls.CONFIG_COLLECTION)) filter_ = {'crecord_type': 'statusmanagement'} config_data = EtherealData(collection=config_collection, filter_=filter_) logger = Logger.get('alerts', cls.LOG_PATH) alerts_storage = Middleware.get_middleware_by_uri( cls.ALERTS_STORAGE_URI ) filter_storage = Middleware.get_middleware_by_uri( cls.FILTER_STORAGE_URI ) context_manager = ContextGraph(logger) watcher_manager = Watcher() pbehavior_manager = PBehaviorManager(*PBehaviorManager.provide_default_basics()) amqp_pub = AmqpPublisher(get_default_amqp_conn(), logger) event_publisher = StatEventPublisher(logger, amqp_pub) return (config, logger, alerts_storage, config_data, filter_storage, context_manager, watcher_manager, event_publisher, pbehavior_manager)
def __init__(self, next_amqp_queues=[], next_balanced=False, name="worker1", beat_interval=60, logging_level=INFO, exchange_name=DIRECT_EXCHANGE_NAME, routing_keys=[], camqp_custom=None, max_retries=5, *args, **kwargs): super(Engine, self).__init__() self.logging_level = logging_level self.debug = logging_level == DEBUG init = Init() self.logger = init.getLogger(name, logging_level=self.logging_level) log_handler = FileHandler(filename=join( root_path, 'var', 'log', 'engines', '{0}.log'.format(name))) log_handler.setFormatter( Formatter("%(asctime)s %(levelname)s %(name)s %(message)s")) # Log in file self.logger.addHandler(log_handler) self.RUN = True self.name = name # Set parametrized Amqp for testing purposes if camqp_custom is None: self.Amqp = Amqp else: self.Amqp = camqp_custom # self.amqp handles the consumption of events from rabbitmq. The # publication of events from self.amqp is deprecated. self.amqp = None # self.beat_amqp_publisher and self.work_amqp_publisher handle the # publication of events (they are separated to prevent sharing a # channel between two threads). self.beat_amqp_publisher = AmqpPublisher(get_default_amqp_connection(), self.logger) self.work_amqp_publisher = AmqpPublisher(get_default_amqp_connection(), self.logger) self.amqp_queue = "Engine_{0}".format(self.name) self.routing_keys = routing_keys self.exchange_name = exchange_name self.perfdata_retention = 3600 self.next_amqp_queues = next_amqp_queues self.get_amqp_queue = cycle(self.next_amqp_queues) # Get from internal or external queue self.next_balanced = next_balanced self.max_retries = max_retries self.counter_error = 0 self.counter_event = 0 self.counter_worktime = 0 self.thd_warn_sec_per_evt = 0.6 self.thd_crit_sec_per_evt = 0.9 self.beat_interval = beat_interval self.beat_last = time() self.create_queue = True self.send_stats_event = True self.rk_on_error = [] self.last_stat = int(time()) self.logger.info("Engine initialized")
class Engine(object): etype = 'Engine' def __init__(self, next_amqp_queues=[], next_balanced=False, name="worker1", beat_interval=60, logging_level=INFO, exchange_name=DIRECT_EXCHANGE_NAME, routing_keys=[], camqp_custom=None, max_retries=5, *args, **kwargs): super(Engine, self).__init__() self.logging_level = logging_level self.debug = logging_level == DEBUG init = Init() self.logger = init.getLogger(name, logging_level=self.logging_level) log_handler = FileHandler(filename=join( root_path, 'var', 'log', 'engines', '{0}.log'.format(name))) log_handler.setFormatter( Formatter("%(asctime)s %(levelname)s %(name)s %(message)s")) # Log in file self.logger.addHandler(log_handler) self.RUN = True self.name = name # Set parametrized Amqp for testing purposes if camqp_custom is None: self.Amqp = Amqp else: self.Amqp = camqp_custom # self.amqp handles the consumption of events from rabbitmq. The # publication of events from self.amqp is deprecated. self.amqp = None # self.beat_amqp_publisher and self.work_amqp_publisher handle the # publication of events (they are separated to prevent sharing a # channel between two threads). self.beat_amqp_publisher = AmqpPublisher(get_default_amqp_connection(), self.logger) self.work_amqp_publisher = AmqpPublisher(get_default_amqp_connection(), self.logger) self.amqp_queue = "Engine_{0}".format(self.name) self.routing_keys = routing_keys self.exchange_name = exchange_name self.perfdata_retention = 3600 self.next_amqp_queues = next_amqp_queues self.get_amqp_queue = cycle(self.next_amqp_queues) # Get from internal or external queue self.next_balanced = next_balanced self.max_retries = max_retries self.counter_error = 0 self.counter_event = 0 self.counter_worktime = 0 self.thd_warn_sec_per_evt = 0.6 self.thd_crit_sec_per_evt = 0.9 self.beat_interval = beat_interval self.beat_last = time() self.create_queue = True self.send_stats_event = True self.rk_on_error = [] self.last_stat = int(time()) self.logger.info("Engine initialized") def new_amqp_queue(self, amqp_queue, routing_keys, on_amqp_event, exchange_name): self.amqp.add_queue(queue_name=amqp_queue, routing_keys=routing_keys, callback=on_amqp_event, exchange_name=exchange_name, no_ack=True, exclusive=False, auto_delete=False) def pre_run(self): pass def post_run(self): pass def run(self): def ready(): self.logger.info(" + Ready!") self.logger.info("Start Engine with pid {0}".format(getpid())) self.amqp = self.Amqp(logging_level=self.logging_level, logging_name="{0}-amqp".format(self.name), on_ready=ready, max_retries=self.max_retries) if self.create_queue: self.new_amqp_queue(self.amqp_queue, self.routing_keys, self.on_amqp_event, self.exchange_name) self.amqp.start() self.pre_run() while self.RUN: # Beat if self.beat_interval: now = time() if now > (self.beat_last + self.beat_interval): self._beat() self.beat_last = now try: sleep(1) except Exception as err: self.logger.error("Error in break time: {0}".format(err)) self.RUN = False except KeyboardInterrupt: self.logger.info(u'Stop request') self.RUN = False self.post_run() self.logger.info("Stop Engine") self.stop() self.logger.info("End of Engine") def on_amqp_event(self, event, msg): try: self._work(event, msg) except Exception as err: if event['rk'] not in self.rk_on_error: self.logger.error(err) self.logger.error("Impossible to deal with: {0}".format(event)) self.rk_on_error.append(event['rk']) self.next_queue(event) def _work(self, event, msg=None, *args, **kargs): start = time() error = False try: if self.debug: if 'processing' not in event: event['processing'] = {} event['processing'][self.etype] = start wevent = self.work(event, msg, *args, **kargs) if wevent != DROP: if isinstance(wevent, dict): event = wevent self.next_queue(event) except Exception as err: error = True self.logger.error("Worker raise exception: {0}".format(err)) self.logger.error(format_exc()) if error: self.counter_error += 1 elapsed = time() - start if elapsed > 3: self.logger.warning("Elapsed time %.2f seconds" % elapsed) self.counter_event += 1 self.counter_worktime += elapsed def work(self, event, amqp_msg): return event def next_queue(self, event): if self.next_balanced: queue_name = self.get_amqp_queue.next() if queue_name: try: self.work_amqp_publisher.direct_event(event, queue_name) except Exception as e: self.logger.exception("Unable to send event to next queue") else: for queue_name in self.next_amqp_queues: try: self.work_amqp_publisher.direct_event(event, queue_name) except Exception as e: self.logger.exception("Unable to send event to next queue") def _beat(self): now = int(time()) if self.last_stat + 60 <= now: self.logger.debug(" + Send stats") self.last_stat = now evt_per_sec = 0 sec_per_evt = 0 if self.counter_event: evt_per_sec = float(self.counter_event) / self.beat_interval self.logger.debug(" + %0.2f event(s)/seconds", evt_per_sec) if self.counter_worktime and self.counter_event: sec_per_evt = self.counter_worktime / self.counter_event self.logger.debug(" + %0.5f seconds/event", sec_per_evt) # Submit event if self.send_stats_event and self.counter_event != 0: state = 0 if sec_per_evt > self.thd_warn_sec_per_evt: state = 1 if sec_per_evt > self.thd_crit_sec_per_evt: state = 2 perf_data_array = [{ 'retention': self.perfdata_retention, 'metric': 'cps_evt_per_sec', 'value': round(evt_per_sec, 2), 'unit': 'evt' }, { 'retention': self.perfdata_retention, 'metric': 'cps_sec_per_evt', 'value': round(sec_per_evt, 5), 'unit': 's', 'warn': self.thd_warn_sec_per_evt, 'crit': self.thd_crit_sec_per_evt }] self.logger.debug(" + State: {0}".format(state)) event = forger(connector="Engine", connector_name="engine", event_type="check", source_type="resource", resource=self.amqp_queue, state=state, state_type=1, output="%0.2f evt/sec, %0.5f sec/evt" % (evt_per_sec, sec_per_evt), perf_data_array=perf_data_array) try: self.beat_amqp_publisher.canopsis_event(event) except Exception as e: self.logger.exception("Unable to send perfdata") self.counter_error = 0 self.counter_event = 0 self.counter_worktime = 0 try: self.beat() except Exception as err: self.logger.error("Beat raise exception: {0}".format(err)) self.logger.error(print_exc()) def beat(self): pass def stop(self): self.RUN = False # cancel self consumer self.amqp.cancel_queues() self.amqp.stop() self.amqp.join() self.logger.debug(" + Stopped") class Lock(object): def __init__(self, engine, name, *args, **kwargs): super(Engine.Lock, self).__init__() self.name = name self.lock_id = '{0}.{1}'.format(engine.etype, name) self.storage = get_storage( namespace='lock', logging_level=engine.logging_level, account=Account(user='******', group='root')).get_backend() self.engine = engine self.lock = {} def own(self): now = time() last = self.lock.get('t', now) if self.lock.get('l', False) \ and (now - last) < self.engine.beat_interval: self.engine.logger.debug( 'Another engine {0} is already holding the lock {1}'. format(self.engine.etype, self.name)) return False else: self.engine.logger.debug( 'Lock {1} on engine {0}, processing...'.format( self.engine.etype, self.name)) return True return False def __enter__(self): lock = self.storage.find_and_modify(query={'_id': self.lock_id}, update={'$set': { 'l': True }}, upsert=True) if lock is not None: self.lock = lock if 't' not in self.lock: self.lock['t'] = 0 return self def __exit__(self, type, value, tb): if self.own(): self.engine.logger.debug( 'Release lock {1} on engine {0}'.format( self.engine.etype, self.name)) self.storage.save({ '_id': self.lock_id, 'l': False, 't': time() }) @classmethod def release(cls, lock_id, object_storage): object_storage.update({'_id': lock_id}, {'$set': { 'l': False, 't': time() }}, upsert=True)
class Watcher: """Watcher class""" def __init__(self, amqp_pub=None): """ :param amqp_pub canopsis.common.amqp.AmqpPublisher: """ self.logger = Logger.get('watcher', LOG_PATH) self.watcher_storage = Middleware.get_middleware_by_uri( 'mongodb-default-watcher://') self.alert_storage = Middleware.get_middleware_by_uri( 'mongodb-periodical-alarm://') self.sla_storage = Middleware.get_middleware_by_uri( 'storage-default-sla://') self.context_graph = ContextGraph(self.logger) self.pbehavior_manager = PBehaviorManager( *PBehaviorManager.provide_default_basics() ) self.amqp_pub = amqp_pub if amqp_pub is None: self.amqp_pub = AmqpPublisher(get_default_amqp_conn()) def get_watcher(self, watcher_id): """Retreive from database the watcher specified by is watcher id. :param str watcher_id: the watcher id :return dict: the wanted watcher. None, if no watcher match the watcher_id """ watcher = self.context_graph.get_entities_by_id(watcher_id) try: return watcher[0] except IndexError: return None def create_watcher(self, body): """ Create watcher entity in context and link to entities. :param dict body: watcher conf """ watcher_id = body['_id'] try: watcher_finder = json.loads(body['mfilter']) except ValueError: self.logger.error('can t decode mfilter') return None except KeyError: self.logger.error('no filter') return None depends_list = self.context_graph.get_entities( query=watcher_finder, projection={'_id': 1} ) self.watcher_storage.put_element(body) depend_list = [] for entity_id in depends_list: depend_list.append(entity_id['_id']) entity = ContextGraph.create_entity_dict( id=watcher_id, name=body['display_name'], etype='watcher', impact=[], depends=depend_list ) # adding the fields specific to the Watcher entities entity['mfilter'] = body['mfilter'] entity['state'] = 0 try: self.context_graph.create_entity(entity) except ValueError: self.context_graph.update_entity(entity) self.compute_state(watcher_id) return True # TODO: return really something def update_watcher(self, watcher_id, updated_field): """Update the watcher specified by is watcher id with updated_field. Raise a ValueError, if the watcher_id do not match any entity. :param str watcher_id: the watcher_id of the watcher to update :param dict updated_field: the fields to update :returns: the updated Watcher :rtype: <Watcher> """ watcher = self.get_watcher(watcher_id) if watcher is None: raise ValueError("No watcher found for the following" " id: {}".format(watcher_id)) if "mfilter" in watcher.keys() and "mfilter" in updated_field.keys(): if updated_field['mfilter'] != watcher['mfilter']: watcher['mfilter'] = updated_field['mfilter'] query = json.loads(updated_field['mfilter']) entities = self.context_graph.get_entities( query=query, projection={'_id': 1}) watcher["depends"] = [entity["_id"] for entity in entities] for key in updated_field: if key == "infos": # update fields inside infos for info_key in updated_field["infos"]: watcher["infos"][info_key] = updated_field["infos"][ info_key] watcher[key] = updated_field[key] self.context_graph.update_entity(watcher) def delete_watcher(self, watcher_id): """ Delete watcher & disable watcher entity in context. :param string watcher_id: watcher_id :returns: the mongodb dict response """ self.context_graph.delete_entity(watcher_id) self.sla_storage.remove_elements(ids=[watcher_id]) return self.watcher_storage.remove_elements(ids=[watcher_id]) def alarm_changed(self, alarm_id): """ Launch a computation of a watcher state. :param alarm_id: alarm id """ watchers = self.context_graph.get_entities(query={'type': 'watcher'}) for i in watchers: if alarm_id in i['depends']: self.compute_state(i['_id']) def compute_watchers(self): """ Compute all watchers states. """ watchers = list(self.watcher_storage.get_elements(query={})) for watcher in watchers: self.compute_state(watcher['_id']) def compute_state(self, watcher_id): """ Send an event watcher with the new state of the watcher. :param watcher_id: watcher id """ try: watcher_entity = self.context_graph.get_entities( query={'_id': watcher_id})[0] except IndexError: return None entities = watcher_entity['depends'] query = {"_id": {"$in": entities}, "enabled": True} cursor = self.context_graph.get_entities(query=query, projection={"_id": 1}) entities = [] for ent in cursor: entities.append(ent["_id"]) display_name = watcher_entity['name'] alarm_list = list(self.alert_storage._backend.find({ '$and': [ {'d': {'$in': entities}}, { '$or': [ {'v.resolved': None}, {'v.resolved': {'$exists': False}} ] } ] })) states = [] for alarm in alarm_list: active_pb = self.pbehavior_manager.get_active_pbehaviors( [alarm['d']] ) if len(active_pb) == 0: states.append(alarm['v']['state']['val']) nb_entities = len(entities) nb_crit = states.count(Check.CRITICAL) nb_major = states.count(Check.MAJOR) nb_minor = states.count(Check.MINOR) nb_ok = nb_entities - (nb_crit + nb_major + nb_minor) # here add selection for calculation method actually it's worst state # by default and think to add pbehavior in tab computed_state = self.worst_state(nb_crit, nb_major, nb_minor) output = '{0} ok, {1} minor, {2} major, {3} critical'.format( nb_ok, nb_minor, nb_major, nb_crit) if computed_state != watcher_entity.get('state', None): watcher_entity['state'] = computed_state self.context_graph.update_entity(watcher_entity) self.publish_event( display_name, computed_state, output, watcher_entity['_id'] ) def compute_slas(self): """ Launch the sla calcul for each watchers. """ watcher_list = self.context_graph.get_entities( query={'type': 'watcher', 'infos.enabled': True}) for watcher in watcher_list: self.sla_compute(watcher['_id'], watcher['infos']['state']) def publish_event(self, display_name, computed_state, output, _id): """ Publish an event watcher on amqp. TODO: move that elsewhere (not specific to watchers) :param display_name: watcher display_name :param computed_state: watcher state :param output: watcher output """ event = forger( connector="canopsis", connector_name="engine", event_type="watcher", source_type="component", component=_id, state=computed_state, output=output, perf_data_array=[], display_name=display_name) self.amqp_pub.canopsis_event(event) def sla_compute(self, watcher_id, state): """ Launch the sla calcul. :param watcher_id: watcher id :param state: watcher state """ # sla_tab = list( # self.sla_storage.get_elements(query={'_id': watcher_id}))[0] # sla_tab['states'][state] = sla_tab['states'][state] + 1 # self.sla_storage.put_element(sla_tab) # watcher_conf = list( # self[self.WATCHER_STORAGE].get_elements( # query={'_id': watcher_id}) # )[0] # sla = Sla(self[self.WATCHER_STORAGE], # 'test/de/rk/on/verra/plus/tard', # watcher_conf['sla_output_tpl'], # watcher_conf['sla_timewindow'], # watcher_conf['sla_warning'], # watcher_conf['alert_level'], # watcher_conf['display_name']) # self.logger.critical('{0}'.format(( # sla_tab['states']/ # (sla_tab['states'][1] + # sla_tab['states'][2] + # sla_tab['states'][3])))) pass @staticmethod def worst_state(nb_crit, nb_major, nb_minor): """Calculate the worst state. :param int nb_crit: critical number :param int nb_major: major number :param int nb_minor: minor number :return int state: return the worst state """ if nb_crit > 0: return 3 elif nb_major > 0: return 2 elif nb_minor > 0: return 1 return 0
class ContextGraphImport(ContextGraph): """The manager in charge of an import of a context. """ # TODO add a feature to restore the context if an error occured during # while is pushed into the database K_LINKS = "links" K_FROM = "from" K_TO = "to" K_CIS = "cis" K_ID = "_id" K_NAME = "name" K_TYPE = "type" K_DEPENDS = "depends" K_IMPACT = "impact" K_MEASUREMENTS = "measurements" K_INFOS = "infos" K_ACTION = "action" K_ENABLE = "enable" K_DISABLE = "disable" K_PROPERTIES = "action_properties" K_ENABLED = "enabled" # If you add an action, remember to add in the a_pattern string in method # import_checker A_DELETE = "delete" A_CREATE = "create" A_SET = "set" A_UPDATE = "update" A_DISABLE = "disable" A_ENABLE = "enable" __A_PATTERN = "^delete$|^create$|^update$|^set$|^disable$|^enable$" __T_PATTERN = "^resource$|^component$|^connector$|^watcher$" __CI_REQUIRED = [K_ID, K_ACTION, K_TYPE] __LINK_REQUIRED = [K_FROM, K_TO, K_ACTION] CIS_SCHEMA = { "$schema": "http://json-schema.org/draft-04/schema#", "type": "object", "required": __CI_REQUIRED, "uniqueItems": True, "properties": { K_ID: { "type": "string" }, K_NAME: { "type": "string" }, K_MEASUREMENTS: { "type": "array", "items": { "type": "string" } }, K_INFOS: { "type": "object" }, K_ACTION: { "type": "string", "pattern": __A_PATTERN }, K_TYPE: { "type": "string", "pattern": __T_PATTERN }, K_PROPERTIES: { "type": "object" } } } LINKS_SCHEMA = { "$schema": "http://json-schema.org/draft-04/schema#", "uniqueItems": True, "type": "object", "required": __LINK_REQUIRED, "properties": { K_ID: { "type": "string" }, K_FROM: { "type": "array", "items": { "type": "string" } }, K_TO: { "type": "string" }, K_INFOS: { "type": "object" }, K_ACTION: { "type": "string", "pattern": __A_PATTERN }, K_PROPERTIES: { "type": "object" } } } def __init__(self, logger=None, *args, **kwargs): """__init__ :param *args: :param **kwargs: """ super(ContextGraphImport, self).__init__(logger, *args, **kwargs) if logger is not None: self.logger = logger # Entities asked for update self.entities_to_update = {} # Entities to update self.update = {} self.delete = [] self.amqp_pub = AmqpPublisher(get_default_amqp_conn(), self.logger) @classmethod def check_element(cls, element, type_): """Check an element with a schema schema specified by his type. :param element: the element to check :param type_: the expected type of the element :raise: ValueError if the type_ is not correct :raise: ValidationError if the element does not match the schema """ if type_ == cls.K_LINKS: schema = cls.LINKS_SCHEMA elif type_ == cls.K_CIS: schema = cls.CIS_SCHEMA else: raise ValueError("Unknowed type {0}\n".format(type_)) jsonschema.validate(element, schema) state = element[ContextGraphImport.K_ACTION] if state == ContextGraphImport.A_DISABLE\ or state == ContextGraphImport.A_ENABLE: element[ContextGraphImport.K_PROPERTIES][state] def clean_attributes(self): del self.entities_to_update del self.update del self.delete self.entities_to_update = dict() self.update = dict() self.delete = list() def __get_entities_to_update(self, file_): """Return every entities id required for the update If a ci or link does not match the schema a ValidationError is raised. :param json: the json with every actions required for the update :param rtype: a dict with the entity id as a key and the entity as a value """ # a set so no duplicate ids without effort and lower time complexity ids_cis = set() ids_links = set() def __get_entities_to_update_links(file_): """Parse the file_ to extract every link""" fd = open(file_, 'r') for link in ijson.items(fd, "{0}.item".format(self.K_LINKS)): self.check_element(link, self.K_LINKS) for id_ in link[self.K_FROM]: ids_links.add(id_) # ids_cis.add(ci[self.K_ID]) ids_links.add(link[self.K_TO]) fd.close() def __get_entities_to_update_cis(file_): """Parse the file_ to extract every CI""" fd = open(file_, 'r') for ci in ijson.items(fd, "{0}.item".format(self.K_CIS)): self.check_element(ci, self.K_CIS) ids_cis.add(ci[self.K_ID]) # we need to retreive every related entity to update the links if ci[self.K_ACTION] == self.A_DELETE: # FIXME do the get_entities_by_id in one call Then add all # impacts depends entity = self.get_entities_by_id(ci[self.K_ID])[0] for id_ in entity["depends"] + entity["impact"]: ids_cis.add(id_) fd.close() cis_thd = ExceptionThread(group=None, target=__get_entities_to_update_cis, name="cis_thread", args=(file_, )) links_thd = ExceptionThread(group=None, target=__get_entities_to_update_links, name="links_thread", args=(file_, )) threads = [cis_thd, links_thd] cis_thd.start() links_thd.start() cis_thd.join() links_thd.join() # Unqueue an raise existing exception for thread in threads: try: excep = thread.except_queue.get_nowait() except Queue.Empty: pass else: self.logger.error("Exception in {0}".format(thread.getName())) self.logger.exception(excep) raise excep ids = ids_links.union(ids_cis) result = self.get_entities_by_id(list(ids)) ctx = {} # transform "depends" and "impact" list in set for improved performance for doc in result: doc[self.K_DEPENDS] = set(doc[self.K_DEPENDS]) doc[self.K_IMPACT] = set(doc[self.K_IMPACT]) ctx[doc[self.K_ID]] = doc return ctx def __a_delete_entity(self, ci): """Update the entities related with the entity to be deleted disigned by ci and store them into self.update. Add the id of entity to be deleted into self.delete. If the entity to be deleted is not initially store in the context, a ValueError will be raised. :param ci: the ci (see the JSON specification). """ id_ = ci[self.K_ID] try: entity = self.entities_to_update[id_] except KeyError: desc = "No entity found for the following id : {}".format(id_) raise ValueError(desc) # Update the depends/impact link for ent_id in entity[self.K_DEPENDS]: if ent_id in self.delete: # the entity of id ent_id is already deleted, skipping continue if ent_id not in self.update: self.update[ent_id] = self.entities_to_update[ent_id].copy() try: if id_ in self.update[ent_id][self.K_IMPACT]: self.update[ent_id][self.K_IMPACT].remove(id_) except ValueError: raise ValueError("Try to remove {0} from impacts field of " "entity {1}.".format(id_, ent_id)) # Update the impact/depends link for ent_id in entity[self.K_IMPACT]: if ent_id in self.delete: # the entity of id ent_id is already deleted, skipping continue if ent_id not in self.update: self.update[ent_id] = self.entities_to_update[ent_id].copy() try: if id_ in self.update[ent_id][self.K_DEPENDS]: self.update[ent_id][self.K_DEPENDS].remove(id_) except ValueError: raise ValueError("Try to remove {0} from impacts field of " "entity {1}.".format(id_, ent_id)) if id_ in self.update: self.update.pop(id_) self.delete.append(id_) def __a_update_entity(self, ci): """Update the entity with the information stored into the ci and store the result into self.update. If the entity to be updated is not initially store in the context, a ValueError will be raised. :param ci: the ci (see the JSON specification). """ if ci[self.K_ID] not in self.entities_to_update: desc = "The ci of id {0} does not match any existing"\ " entity.".format(ci[self.K_ID]) raise ValueError(desc) entity = self.entities_to_update[ci[self.K_ID]] for key in [self.K_ACTION, self.K_PROPERTIES]: try: del ci[key] except KeyError: msg = "No key {0} in ci of id {1}." self.logger.debug(msg.format(key, ci[self.K_ID])) for key in ci: entity[key] = ci[key] self.update[ci[self.K_ID]] = entity def __a_set_entity(self, ci): """Create an entity with a ci and store it into self.update If the new entity is initially store in the context, a ValueError will be raised. :param ci: the ci (see the JSON specification). """ # TODO handle the creation of the name if needed and if the id # match the id scheme used in canopsis if ci[self.K_ID] in self.entities_to_update: desc = ("The ci of id {} match an existing entity. Updating it.". format(ci["_id"])) self.logger.info(desc) entity_to_update = self.entities_to_update[ci[self.K_ID]].copy() dict_merge(entity_to_update, ci) ci = entity_to_update # set default value for required fields if self.K_NAME not in ci: ci[self.K_NAME] = ci[self.K_ID] if self.K_DEPENDS not in ci: ci[self.K_DEPENDS] = set() else: ci[self.K_DEPENDS] = set(ci[self.K_DEPENDS]) if self.K_IMPACT not in ci: ci[self.K_IMPACT] = set() else: ci[self.K_IMPACT] = set(ci[self.K_IMPACT]) if self.K_MEASUREMENTS not in ci: ci[self.K_MEASUREMENTS] = [] if self.K_INFOS not in ci: ci[self.K_INFOS] = {} for key in [self.K_ACTION, self.K_PROPERTIES]: try: del ci[key] except KeyError: self.logger.debug("No key {0} in ci of id {1}.".format( key, ci[self.K_ID])) entity = {} for key in ci: entity[key] = ci[key] self.update[ci[self.K_ID]] = entity def __a_create_entity(self, ci): """Create an entity with a ci and store it into self.update If the new entity is initially store in the context, a ValueError will be raised. :param ci: the ci (see the JSON specification). """ # TODO handle the creation of the name if needed and if the id # match the id scheme used in canopsis if ci[self.K_ID] in self.entities_to_update: desc = ("The ci of id {} match an existing entity. Overriding it.". format(ci["_id"])) self.logger.info(desc) # set default value for required fields if self.K_NAME not in ci: ci[self.K_NAME] = ci[self.K_ID] if self.K_DEPENDS not in ci: ci[self.K_DEPENDS] = set() else: ci[self.K_DEPENDS] = set(ci[self.K_DEPENDS]) if self.K_IMPACT not in ci: ci[self.K_IMPACT] = set() else: ci[self.K_IMPACT] = set(ci[self.K_IMPACT]) if self.K_MEASUREMENTS not in ci: ci[self.K_MEASUREMENTS] = [] if self.K_INFOS not in ci: ci[self.K_INFOS] = {} for key in [self.K_ACTION, self.K_PROPERTIES]: try: del ci[key] except KeyError: self.logger.debug("No key {0} in ci of id {1}.".format( key, ci[self.K_ID])) entity = {} for key in ci: entity[key] = ci[key] self.update[ci[self.K_ID]] = entity def __change_state_entity(self, ci, state): """Change the state (enable/disable) of an entity and store the result into self.update. If state does not match enable or disable, a ValueError will be raised. :param ci: the ci (see the JSON specification). :param state: if the state is "disable", the timestamp of the deactivation of the entity will be store into the fields infos.disable. Same behaviour with "enable" but the timestamp will be store into infos.enable. """ if state != self.A_DISABLE and state != self.A_ENABLE: raise ValueError("{0} is not a valid state.".format(state)) id_ = ci[self.K_ID] if id_ not in self.entities_to_update: desc = "The ci of id {0} does not match any existing"\ " entity.".format(id_) raise ValueError(desc) # If the entity is not in the update dict, add it if id_ not in self.update: self.update[id_] = self.entities_to_update[id_].copy() if state == self.A_DISABLE: key_history = "disable_history" key = self.K_DISABLE self.update[id_][self.K_ENABLED] = False else: key_history = "enable_history" key = self.K_ENABLE self.update[id_][self.K_ENABLED] = True # Update entity the fields enable/disable of infos timestamp = ci[self.K_PROPERTIES][key] if not isinstance(timestamp, list): if timestamp is None: timestamp = [] else: timestamp = [timestamp] if self.update[id_].has_key(key_history): if self.update[id_][key_history] is None: self.update[id_][key_history] = timestamp else: self.update[id_][key_history] += timestamp else: self.update[id_][key_history] = timestamp def __a_disable_entity(self, ci): """Disable an entity defined by ci. For more information, see __change_state. :param ci: the ci (see the JSON specification). """ self.__change_state_entity(ci, self.K_DISABLE) def __a_enable_entity(self, ci): """Enable an entity defined by ci. For more information, see __change_state. :param ci: the ci (see the JSON specification). """ self.__change_state_entity(ci, self.K_ENABLE) def __a_delete_link(self, link): """Delete a link between two entity and store the modify entities into self.udpate. :param link: the link that identify a link (see the JSON specification) """ for id_ in link[self.K_FROM]: if id_ not in self.update: self.update[id_] = self.entities_to_update[id_] if link[self.K_TO] not in self.update: to_ = link[self.K_TO] self.update[to_] = self.entities_to_update[to_] for id_ in link[self.K_FROM]: self.update[id_][self.K_IMPACT].remove(link[self.K_TO]) self.update[link[self.K_TO]][self.K_DEPENDS].remove(id_) def __a_update_link(self, link): raise NotImplementedError() def __a_create_link(self, link): """Create a link between two entity and store the modify entities into self.udpate. :param link: the link that identify a link (see the JSON specification) """ if link[self.K_TO] not in self.update: to_ = link[self.K_TO] self.update[to_] = self.entities_to_update[to_] for ci_id in link[self.K_FROM]: if ci_id not in self.update: self.update[ci_id] = self.entities_to_update[ci_id] self.update[ci_id][self.K_IMPACT].add(link[self.K_TO]) self.update[link[self.K_TO]][self.K_DEPENDS].add(ci_id) def __a_disable_link(self, link): raise NotImplementedError() def __a_enable_link(self, link): raise NotImplementedError() @classmethod def __superficial_check(cls, fd): """Check if the cis and links field are a list. If not, raise a jsonschema.ValidationError. It move the cursor of the fd to back 0.""" # cis and links store if a field cis and links are found in the import # *_start store if a the beginning of a json array are found in the cis # and links fields of an import # *_end store if a the end of a json array are found in the cis and # links fields of an import cis = False cis_start = False cis_end = False links = False links_start = False links_end = False parser = ijson.parse(fd) for prefix, event, _ in parser: if prefix == "cis": cis = True if event == "end_array": cis_end = True if event == "start_array": cis_start = True if prefix == "links": links = True if event == "end_array": links_end = True if event == "start_array": links_start = True fd.seek(0) cis_status = (cis, cis_start, cis_end) links_status = (links, links_start, links_end) # ok is a filter to ascertain if a cis/link field of an import is # correct. ok = [(True, True, True), (False, False, False)] if cis_status in ok and links_status in ok: return True elif cis_status not in ok and links_status not in ok: raise jsonschema.ValidationError( "CIS and LINKS should be an array.") elif cis_status not in ok: raise jsonschema.ValidationError("CIS should be an array.") elif links_status not in ok: raise jsonschema.ValidationError("LINKS should be an array.") def import_context(self, uuid): """Import a new context. :param uuid: the uuid of the import to process :return type: a tuple (updated entities, deleted entities) """ file_ = ImportKey.IMPORT_FILE.format(uuid) fd = open(file_, 'r') # In case the previous import failed and/or raise an exception, we\ # clean now self.clean_attributes() start = time.time() self.__superficial_check(fd) end = time.time() self.logger.debug("Import {0} : superficial" " check {1}.".format(uuid, execution_time(end - start))) start = time.time() self.entities_to_update = self.__get_entities_to_update(file_) end = time.time() self.logger.debug("Import {0} : get_entities_to" "_update {1}.".format(uuid, execution_time(end - start))) # create/update watcher tracker watcher_tracker = set() # Process cis list start = time.time() for ci in ijson.items(fd, "{0}.item".format(self.K_CIS)): self.logger.debug("Current ci : {0}".format(ci)) if ci[self.K_ACTION] == self.A_DELETE: self.__a_delete_entity(ci) elif ci[self.K_ACTION] == self.A_CREATE: if ci.get(self.K_TYPE, '') == 'watcher': watcher_tracker.add(ci.get(self.K_ID)) self.__a_create_entity(ci) elif ci[self.K_ACTION] == self.A_UPDATE: if ci.get(self.K_TYPE, '') == 'watcher': watcher_tracker.add(ci.get(self.K_ID)) self.__a_update_entity(ci) elif ci[self.K_ACTION] == self.A_SET: self.__a_set_entity(ci) elif ci[self.K_ACTION] == self.A_DISABLE: self.__a_disable_entity(ci) elif ci[self.K_ACTION] == self.A_ENABLE: self.__a_enable_entity(ci) else: raise ValueError("The action {0} is not recognized\n".format( ci[self.K_ACTION])) end = time.time() self.logger.debug("Import {0} : update cis {1}.".format( uuid, execution_time(end - start))) fd.seek(0) # Process link list start = time.time() for link in ijson.items(fd, "{0}.item".format(self.K_LINKS)): self.logger.debug("Current link : {0}".format(link)) if link[self.K_ACTION] == self.A_DELETE: self.__a_delete_link(link) elif link[self.K_ACTION] == self.A_CREATE: self.__a_create_link(link) elif link[self.K_ACTION] == self.A_UPDATE: self.__a_update_link(link) elif link[self.K_ACTION] == self.A_DISABLE: self.__a_disable_link(link) elif link[self.K_ACTION] == self.A_ENABLE: self.__a_enable_link(link) else: raise ValueError("The action {0} is not recognized".format( link[self.K_ACTION])) end = time.time() self.logger.debug("Import {0} : update links" " {1}.".format(uuid, execution_time(end - start))) for id_ in self.update: if id_ in self.delete: desc = "The entity {0} to be deleted is updated in "\ "the same import. Update aborted.".format(id_) raise ValueError(desc) updated_entities = len(self.update) deleted_entities = len(self.delete) for entity in self.update.values(): entity[self.K_IMPACT] = list(entity[self.K_IMPACT]) entity[self.K_DEPENDS] = list(entity[self.K_DEPENDS]) start = time.time() self._put_entities(self.update.values()) # send updatewatcher event for _id in watcher_tracker: event = forger(connector="watcher", connector_name="watcher", event_type="updatewatcher", source_type="component", component=_id) self.amqp_pub.canopsis_event(event) end = time.time() self.logger.debug("Import {0} : push updated" " entities {1}.".format(uuid, execution_time(end - start))) start = time.time() self._delete_entities(self.delete) end = time.time() self.logger.debug("Import {0} : delete entities" " {1}.".format(uuid, execution_time(end - start))) self.clean_attributes() return updated_entities, deleted_entities
def test_json_document(self): jdoc = {'bla': 'bla'} with AmqpConnection(self.amqp_url) as ac: amqp_pub = AmqpPublisher(ac, Mock()) amqp_pub.json_document(jdoc, self.amqp_exname, '#')
class Engine(object): etype = 'Engine' def __init__(self, next_amqp_queues=[], next_balanced=False, name="worker1", beat_interval=60, logging_level=INFO, exchange_name=DIRECT_EXCHANGE_NAME, routing_keys=[], camqp_custom=None, max_retries=5, *args, **kwargs): super(Engine, self).__init__() self.logging_level = logging_level self.debug = logging_level == DEBUG init = Init() self.logger = init.getLogger(name, logging_level=self.logging_level) log_handler = FileHandler( filename=join( root_path, 'var', 'log', 'engines', '{0}.log'.format(name) ) ) log_handler.setFormatter( Formatter( "%(asctime)s %(levelname)s %(name)s %(message)s" ) ) # Log in file self.logger.addHandler(log_handler) self.RUN = True self.name = name # Set parametrized Amqp for testing purposes if camqp_custom is None: self.Amqp = Amqp else: self.Amqp = camqp_custom # self.amqp handles the consumption of events from rabbitmq. The # publication of events from self.amqp is deprecated. self.amqp = None # self.beat_amqp_publisher and self.work_amqp_publisher handle the # publication of events (they are separated to prevent sharing a # channel between two threads). self.beat_amqp_publisher = AmqpPublisher( get_default_amqp_connection(), self.logger) self.work_amqp_publisher = AmqpPublisher( get_default_amqp_connection(), self.logger) self.amqp_queue = "Engine_{0}".format(self.name) self.routing_keys = routing_keys self.exchange_name = exchange_name self.perfdata_retention = 3600 self.next_amqp_queues = next_amqp_queues self.get_amqp_queue = cycle(self.next_amqp_queues) # Get from internal or external queue self.next_balanced = next_balanced self.max_retries = max_retries self.counter_error = 0 self.counter_event = 0 self.counter_worktime = 0 self.thd_warn_sec_per_evt = 0.6 self.thd_crit_sec_per_evt = 0.9 self.beat_interval = beat_interval self.beat_last = time() self.create_queue = True self.send_stats_event = True self.rk_on_error = [] self.last_stat = int(time()) self.logger.info("Engine initialized") def new_amqp_queue(self, amqp_queue, routing_keys, on_amqp_event, exchange_name): self.amqp.add_queue( queue_name=amqp_queue, routing_keys=routing_keys, callback=on_amqp_event, exchange_name=exchange_name, no_ack=True, exclusive=False, auto_delete=False ) def pre_run(self): pass def post_run(self): pass def run(self): def ready(): self.logger.info(" + Ready!") self.logger.info("Start Engine with pid {0}".format(getpid())) self.amqp = self.Amqp( logging_level=self.logging_level, logging_name="{0}-amqp".format(self.name), on_ready=ready, max_retries=self.max_retries ) if self.create_queue: self.new_amqp_queue( self.amqp_queue, self.routing_keys, self.on_amqp_event, self.exchange_name ) self.amqp.start() self.pre_run() while self.RUN: # Beat if self.beat_interval: now = time() if now > (self.beat_last + self.beat_interval): self._beat() self.beat_last = now try: sleep(1) except Exception as err: self.logger.error("Error in break time: {0}".format(err)) self.RUN = False except KeyboardInterrupt: self.logger.info(u'Stop request') self.RUN = False self.post_run() self.logger.info("Stop Engine") self.stop() self.logger.info("End of Engine") def on_amqp_event(self, event, msg): try: self._work(event, msg) except Exception as err: if event['rk'] not in self.rk_on_error: self.logger.error(err) self.logger.error("Impossible to deal with: {0}".format(event)) self.rk_on_error.append(event['rk']) self.next_queue(event) def _work(self, event, msg=None, *args, **kargs): start = time() error = False try: if self.debug: if 'processing' not in event: event['processing'] = {} event['processing'][self.etype] = start wevent = self.work(event, msg, *args, **kargs) if wevent != DROP: if isinstance(wevent, dict): event = wevent self.next_queue(event) except Exception as err: error = True self.logger.error("Worker raise exception: {0}".format(err)) self.logger.error(format_exc()) if error: self.counter_error += 1 elapsed = time() - start if elapsed > 3: self.logger.warning("Elapsed time %.2f seconds" % elapsed) self.counter_event += 1 self.counter_worktime += elapsed def work(self, event, amqp_msg): return event def next_queue(self, event): if self.next_balanced: queue_name = self.get_amqp_queue.next() if queue_name: try: self.work_amqp_publisher.direct_event(event, queue_name) except Exception as e: self.logger.exception("Unable to send event to next queue") else: for queue_name in self.next_amqp_queues: try: self.work_amqp_publisher.direct_event(event, queue_name) except Exception as e: self.logger.exception("Unable to send event to next queue") def _beat(self): now = int(time()) if self.last_stat + 60 <= now: self.logger.debug(" + Send stats") self.last_stat = now evt_per_sec = 0 sec_per_evt = 0 if self.counter_event: evt_per_sec = float(self.counter_event) / self.beat_interval self.logger.debug(" + %0.2f event(s)/seconds", evt_per_sec) if self.counter_worktime and self.counter_event: sec_per_evt = self.counter_worktime / self.counter_event self.logger.debug(" + %0.5f seconds/event", sec_per_evt) # Submit event if self.send_stats_event and self.counter_event != 0: state = 0 if sec_per_evt > self.thd_warn_sec_per_evt: state = 1 if sec_per_evt > self.thd_crit_sec_per_evt: state = 2 perf_data_array = [ { 'retention': self.perfdata_retention, 'metric': 'cps_evt_per_sec', 'value': round(evt_per_sec, 2), 'unit': 'evt' }, { 'retention': self.perfdata_retention, 'metric': 'cps_sec_per_evt', 'value': round(sec_per_evt, 5), 'unit': 's', 'warn': self.thd_warn_sec_per_evt, 'crit': self.thd_crit_sec_per_evt } ] self.logger.debug(" + State: {0}".format(state)) event = forger( connector="Engine", connector_name="engine", event_type="check", source_type="resource", resource=self.amqp_queue, state=state, state_type=1, output="%0.2f evt/sec, %0.5f sec/evt" % ( evt_per_sec, sec_per_evt), perf_data_array=perf_data_array ) try: self.beat_amqp_publisher.canopsis_event(event) except Exception as e: self.logger.exception("Unable to send perfdata") self.counter_error = 0 self.counter_event = 0 self.counter_worktime = 0 try: self.beat() except Exception as err: self.logger.error("Beat raise exception: {0}".format(err)) self.logger.error(print_exc()) def beat(self): pass def stop(self): self.RUN = False # cancel self consumer self.amqp.cancel_queues() self.amqp.stop() self.amqp.join() self.logger.debug(" + Stopped") class Lock(object): def __init__(self, engine, name, *args, **kwargs): super(Engine.Lock, self).__init__() self.name = name self.lock_id = '{0}.{1}'.format(engine.etype, name) self.storage = get_storage( namespace='lock', logging_level=engine.logging_level, account=Account(user='******', group='root') ).get_backend() self.engine = engine self.lock = {} def own(self): now = time() last = self.lock.get('t', now) if self.lock.get('l', False) \ and (now - last) < self.engine.beat_interval: self.engine.logger.debug( 'Another engine {0} is already holding the lock {1}'. format(self.engine.etype, self.name)) return False else: self.engine.logger.debug( 'Lock {1} on engine {0}, processing...'.format( self.engine.etype, self.name)) return True return False def __enter__(self): lock = self.storage.find_and_modify( query={'_id': self.lock_id}, update={'$set': {'l': True}}, upsert=True ) if lock is not None: self.lock = lock if 't' not in self.lock: self.lock['t'] = 0 return self def __exit__(self, type, value, tb): if self.own(): self.engine.logger.debug( 'Release lock {1} on engine {0}'.format( self.engine.etype, self.name)) self.storage.save({ '_id': self.lock_id, 'l': False, 't': time() }) @classmethod def release(cls, lock_id, object_storage): object_storage.update( {'_id': lock_id}, {'$set': { 'l': False, 't': time() }}, upsert=True )
def test_canopsis_event(self): with AmqpConnection(self.amqp_url) as ac: amqp_pub = AmqpPublisher(ac, Mock()) amqp_pub.canopsis_event(self.event, self.amqp_exname)
class Archiver(Middleware): def __init__( self, namespace, confnamespace='object', storage=None, autolog=False, amqp_pub=None, *args, **kwargs ): super(Archiver, self).__init__() self.namespace = namespace self.namespace_log = namespace + '_log' # Bulk operation configuration self.last_bulk_insert_date = time() self.bulk_ids = [] # How many events can be buffered self.bulk_amount = 500 # What is the maximum duration until bulk insert self.bulk_delay = 3 self.incoming_events = {} self.autolog = autolog self.logger.debug("Init Archiver on %s" % namespace) self.account = Account(user="******", group="root") if not storage: self.logger.debug(" + Get storage") self.storage = get_storage( namespace=namespace, logging_level=self.log_lvl ) else: self.storage = storage self.conf_storage = get_storage( namespace=confnamespace, logging_level=self.log_lvl ) self.conf_collection = self.conf_storage.get_backend(confnamespace) self.collection = self.storage.get_backend(namespace) if amqp_pub is None: self.amqp_pub = AmqpPublisher(get_default_amqp_connection()) self.reset_stealthy_event_duration = time() self.reset_stats() def reset_stats(self): self.stats = { 'update': 0, 'insert ' + self.namespace: 0, 'insert ' + self.namespace_log: 0 } def beat(self): self.logger.info( ( 'DB documents stats : ' + 'update: {} in events, ' + 'insert: {} in events, ' + 'insert: {} in events_log').format( self.stats['update'], self.stats['insert ' + self.namespace], self.stats['insert ' + self.namespace_log] ) ) self.reset_stats() def process_insert_operations_collection(self, operations, collection): self.stats['insert ' + collection] += len(operations) if operations: # is there any event to process ? backend = self.storage.get_backend(collection) bulk = backend.initialize_unordered_bulk_op() for operation in operations: record = Record(operation['event']) record.type = "event" event = record.dump() bulk.insert(event) try: bulk.execute({'w': 0}) except BulkWriteError as bwe: self.logger.warning(bwe.details) self.logger.info('inserted log events {}'.format(len(operations))) def process_update_operations(self, operations): self.stats['update'] += len(operations) if operations: # is there any event to process ? backend = self.storage.get_backend('events') bulk = backend.initialize_unordered_bulk_op() for operation in operations: bulk.find(operation['query']).update(operation['update']) bulk.execute({'w': 0}) def process_insert_operations(self, insert_operations): events = {} events_log = {} # Avoid same RK insert for operation in insert_operations: if '_id' not in operation['event']: self.logger.error( 'Unable to find _id value in event {}'.format( operation['event'] ) ) else: _id = operation['event']['_id'] if operation['collection'] == self.namespace: events[_id] = operation elif operation['collection'] == self.namespace_log: _id = '{}.{}'.format(_id, time()) operation['event']['_id'] = _id events_log[_id] = operation else: self.logger.critical( 'Wrong operation type {}'.format( operation['collection'] ) ) self.process_insert_operations_collection( events.values(), 'events' ) self.process_insert_operations_collection( events_log.values(), 'events_log' ) def reload_configuration(self): # Default values self.restore_event = True self.bagot_freq = 10 self.bagot_time = 3600 self.stealthy_time = 360 self.stealthy_show = 360 state_config = self.conf_collection.find_one( {'crecord_type': 'statusmanagement'} ) if state_config is not None: self.bagot_freq = state_config.get('bagot_freq', self.bagot_freq) self.bagot_time = state_config.get('bagot_time', self.bagot_time) self.stealthy_time = state_config.get( 'stealthy_time', self.stealthy_time ) self.stealthy_show = state_config.get( 'stealthy_show', self.stealthy_show ) self.restore_event = state_config.get( 'restore_event', self.restore_event ) self.logger.debug( ( 'Archiver configuration ->\n' + ' bagot_freq {}\n' + ' bagot_time {}\n' + ' stealthy_show {}\n' + ' stealthy_time {}' ).format( self.bagot_freq, self.bagot_time, self.stealthy_show, self.stealthy_time ) ) def reset_status_event(self, reset_type): """Trigger event status reset to off/on going status if event are in BAGOT or STEALTHY status. :param reset_type: event status to consider and change. :type int: This is en enum, can be either BAGOT or STEALTHY """ if reset_type not in [BAGOT, STEALTHY]: self.logger.info('wrong reset type given, will not process') return # Dynamic method parameter depends on reset type input compare_property = { BAGOT: 'last_state_change', STEALTHY: 'ts_first_stealthy' }[reset_type] configuration_delay = { BAGOT: self.bagot_time, STEALTHY: self.stealthy_show }[reset_type] event_cursor = self.collection.find( { 'crecord_type': 'event', 'status': reset_type } ) # Change all potention reset type events for event in event_cursor: # This is a bagot event. is_show_delay_passed = \ time() - event[compare_property] >= configuration_delay # Check the stealthy intervals if is_show_delay_passed: self.logger.info( 'Event {} no longer in status {}'.format( event['rk'], reset_type ) ) new_status = ONGOING if event['state'] else OFF self.set_status(event, new_status) event['pass_status'] = 1 self.amqp_pub.canopsis_event(event) def is_bagot(self, event): """ Args: event map of the current evet Returns: ``True`` if the event is bagot ``False`` otherwise """ ts_curr = event['timestamp'] ts_first_bagot = event.get('ts_first_bagot', 0) ts_diff_bagot = (ts_curr - ts_first_bagot) freq = event.get('bagot_freq', -1) result = ts_diff_bagot <= self.bagot_time and freq >= self.bagot_freq return result def is_stealthy(self, event, d_status): """ Args: event map of the current evet d_status status of the previous event Returns: ``True`` if the event is stealthy ``False`` otherwise """ ts_diff = event['timestamp'] - event['ts_first_stealthy'] result = ts_diff <= self.stealthy_time and d_status != STEALTHY return result def set_status(self, event, status, devent=None): """ Args: event map of the current event status status of the current event """ log = 'Status is set to {} for event {}'.format(status, event['rk']) bagot_freq = event.get('bagot_freq', 0) values = { OFF: { 'freq': bagot_freq, 'name': 'Off' }, ONGOING: { 'freq': bagot_freq, 'name': 'On going' }, STEALTHY: { 'freq': bagot_freq, 'name': 'Stealthy' }, BAGOT: { 'freq': bagot_freq + 1, 'name': 'Bagot' }, CANCELED: { 'freq': bagot_freq, 'name': 'Cancelled' } } self.logger.debug(log.format(values[status]['name'])) # This is an additional check as stealthy # status is not properly managed until now if status != STEALTHY: event['status'] = status elif devent['state'] != 0 and event['state'] == 0: delta = time() - event['last_state_change'] if delta < self.stealthy_time: event['status'] = status event['bagot_freq'] = values[status]['freq'] if status not in [STEALTHY, BAGOT]: event['ts_first_stealthy'] = 0 def check_stealthy(self, devent, ts): """ Args: devent map of the previous event ts timestamp of the current event Returns: ``True`` if the event should stay stealthy ``False`` otherwise """ result = False if devent['status'] == STEALTHY: result = (ts - devent['ts_first_stealthy']) <= self.stealthy_show return result def check_statuses(self, event, devent): """ Args: event map of the current event devent map of the previous evet """ if event.get('pass_status', 0): event['pass_status'] = 0 return event_ts = event['timestamp'] event['bagot_freq'] = devent.get('bagot_freq', 0) event['ts_first_stealthy'] = devent.get('ts_first_stealthy', 0) event['ts_first_bagot'] = devent.get('ts_first_bagot', 0) dstate = devent['state'] # Increment frequency if state changed and set first occurences if ((not dstate and event['state']) or dstate and not event['state']): if event['state']: event['ts_first_stealthy'] = event_ts else: event['ts_first_stealthy'] = event_ts event['bagot_freq'] += 1 if not event['ts_first_bagot']: event['ts_first_bagot'] = event_ts # Out of bagot interval, reset variables if event['ts_first_bagot'] - event_ts > self.bagot_time: event['ts_first_bagot'] = 0 event['bagot_freq'] = 0 # If not canceled, proceed to check the status if (devent.get('status', ONGOING) != CANCELED or (dstate != event['state'] and (event['state'] == OFF or dstate == OFF))): # Check the stealthy intervals if self.check_stealthy(devent, event_ts): if self.is_bagot(event): self.set_status(event, BAGOT) else: self.set_status(event, STEALTHY, devent=devent) # Else proceed normally else: if (event['state'] == OFF): # If still non-alert, can only be OFF if (not self.is_bagot(event) and not self.is_stealthy(event, devent['status'])): self.set_status(event, OFF) elif self.is_bagot(event): self.set_status(event, BAGOT) elif self.is_stealthy(event, devent['status']): self.set_status(event, STEALTHY, devent=devent) else: # If not bagot/stealthy, can only be ONGOING if (not self.is_bagot(event) and not self.is_stealthy(event, devent['status'])): self.set_status(event, ONGOING) elif self.is_bagot(event): self.set_status(event, BAGOT) elif self.is_stealthy(event, devent['status']): if devent['status'] == OFF: self.set_status(event, ONGOING) else: self.set_status(event, STEALTHY, devent=devent) else: self.set_status(event, CANCELED) def check_event(self, _id, event): """ This method aims to buffer and process incoming events. Processing is done on buffer to reduce database operations. """ # As this was not done until now... setting event primary key event['_id'] = _id # Buffering event informations self.bulk_ids.append(_id) self.incoming_events[_id] = event # Processing many events condition computation bulk_modulo = len(self.bulk_ids) % self.bulk_amount elapsed_time = time() - self.last_bulk_insert_date # When enough event buffered/time elapsed # processing events buffers if bulk_modulo == 0 or elapsed_time > self.bulk_delay: insert_operations = [] update_operations = [] query = {'_id': {'$in': self.bulk_ids}} devents = {} # Put previous events in pretty data structure backend = self.storage.get_backend(self.namespace) for devent in backend.find(query): devents[devent['_id']] = devent # Try to match previous and new incoming event for _id in self.incoming_events: event = self.incoming_events[_id] devent = None if _id in devents: devent = devents[_id] # Effective archiver processing call operations = self.process_an_event(_id, event, devent) for operation in operations: if operation['type'] == 'insert': insert_operations.append(operation) else: update_operations.append(operation) self.process_insert_operations(insert_operations) self.process_update_operations(update_operations) # Buld processing done, reseting informations self.bulk_ids = [] self.incoming_events = {} self.last_bulk_insert_date = time() # Half useless retro compatibility if 'state' in event and event['state']: return _id def process_an_event(self, _id, event, devent): operations = [] changed = False new_event = False state = event['state'] state_type = event['state_type'] now = int(time()) event['timestamp'] = event.get('timestamp', now) try: # Get old record exclusion_fields = { 'perf_data_array', 'processing' } if not devent: new_event = True # may have side effects on acks/cancels devent = {} old_state = devent['state'] old_state_type = devent['state_type'] event['last_state_change'] = devent.get( 'last_state_change', event['timestamp'] ) if state != old_state: event['previous_state'] = old_state if state != old_state or state_type != old_state_type: changed = True self.check_statuses(event, devent) except Exception: # No old record event['ts_first_stealthy'] = 0 changed = True old_state = state if changed: # Tests if change is from alert to non alert if ('last_state_change' in event and (state == 0 or (state > 0 and old_state == 0))): event['previous_state_change_ts'] = event['last_state_change'] event['last_state_change'] = event.get('timestamp', now) if new_event: # copy avoid side effects operations.append({ 'type': 'insert', 'event': event.copy(), 'collection': 'events' }) else: change = {} # Clear ack related information when event goes ok state # Ack is kept in the current event below if event['state'] == 0 and devent.get('state') != 0: for key in ( 'ticket_declared_author', 'ticket_declared_date', 'ticket_date', 'ticket' ): change[key] = None # keep ack information if status does not reset event if 'ack' in devent: if event['status'] == 0: was_ack = devent.get('ack', {}).get('isAck', False) # save was ack for stats purposes change['ack'] = { 'wasAck': was_ack } else: change['ack'] = devent['ack'] # remove was ack for accurate stats # when event change to any alert state if 'wasAck' in change['ack']: del change['ack']['wasAck'] # keep cancel information if status does not reset event if 'cancel' in devent: if event['status'] not in [0, 1]: change['cancel'] = devent['cancel'] else: change['cancel'] = {} # Remove ticket information in case state is back to normal # (both ack and ticket declaration case) if 'ticket_declared_author' in devent and event['status'] == 0: change['ticket_declared_author'] = None change['ticket_declared_date'] = None # Remove ticket information in case state is back to normal # (ticket number declaration only case) if 'ticket' in devent and event['status'] == 0: del devent['ticket'] if 'ticket_date' in devent: del devent['ticket_date'] # Generate diff change from old event to new event for key in event: if key not in exclusion_fields: if (key in event and key in devent and devent[key] != event[key]): change[key] = event[key] elif key in event and key not in devent: change[key] = event[key] # Manage keep state key that allow # from UI to keep the choosen state # into until next ok state event_reset = False # When a event is ok again, dismiss keep_state statement if devent.get('keep_state') and event['state'] == 0: change['keep_state'] = False event_reset = True # assume we do not just received a keep state and # if keep state was sent previously # then override state of new event if 'keep_state' not in event: if not event_reset and devent.get('keep_state'): change['state'] = devent['state'] # Keep previous output if 'keep_state' in event: change['change_state_output'] = event['output'] change['output'] = devent.get('output', '') if change: operations.append( { 'type': 'update', 'update': {'$set': change}, 'query': {'_id': _id}, 'collection': 'events' } ) # I think that is the right condition to log have_to_log = event.get('previous_state', state) != state if have_to_log: # store ack information to log collection if 'ack' in devent: event['ack'] = devent['ack'] self.logger.info(' + State changed, have to log {}'.format(_id)) # copy avoid side effects operations.append( { 'type': 'insert', 'event': event.copy(), 'collection': 'events_log' } ) return operations
def __init__(self, next_amqp_queues=[], next_balanced=False, name="worker1", beat_interval=60, logging_level=INFO, exchange_name=DIRECT_EXCHANGE_NAME, routing_keys=[], camqp_custom=None, max_retries=5, *args, **kwargs): super(Engine, self).__init__() self.logging_level = logging_level self.debug = logging_level == DEBUG init = Init() self.logger = init.getLogger(name, logging_level=self.logging_level) log_handler = FileHandler( filename=join( root_path, 'var', 'log', 'engines', '{0}.log'.format(name) ) ) log_handler.setFormatter( Formatter( "%(asctime)s %(levelname)s %(name)s %(message)s" ) ) # Log in file self.logger.addHandler(log_handler) self.RUN = True self.name = name # Set parametrized Amqp for testing purposes if camqp_custom is None: self.Amqp = Amqp else: self.Amqp = camqp_custom # self.amqp handles the consumption of events from rabbitmq. The # publication of events from self.amqp is deprecated. self.amqp = None # self.beat_amqp_publisher and self.work_amqp_publisher handle the # publication of events (they are separated to prevent sharing a # channel between two threads). self.beat_amqp_publisher = AmqpPublisher( get_default_amqp_connection(), self.logger) self.work_amqp_publisher = AmqpPublisher( get_default_amqp_connection(), self.logger) self.amqp_queue = "Engine_{0}".format(self.name) self.routing_keys = routing_keys self.exchange_name = exchange_name self.perfdata_retention = 3600 self.next_amqp_queues = next_amqp_queues self.get_amqp_queue = cycle(self.next_amqp_queues) # Get from internal or external queue self.next_balanced = next_balanced self.max_retries = max_retries self.counter_error = 0 self.counter_event = 0 self.counter_worktime = 0 self.thd_warn_sec_per_evt = 0.6 self.thd_crit_sec_per_evt = 0.9 self.beat_interval = beat_interval self.beat_last = time() self.create_queue = True self.send_stats_event = True self.rk_on_error = [] self.last_stat = int(time()) self.logger.info("Engine initialized")
def _amqp_setup(self): self.amqp_conn = AmqpConnection(self.AMQP_URL) self.amqp_pub = AmqpPublisher(self.amqp_conn)
class Archiver(Middleware): def __init__( self, namespace, confnamespace='object', storage=None, autolog=False, amqp_pub=None, *args, **kwargs ): super(Archiver, self).__init__() self.namespace = namespace self.namespace_log = namespace + '_log' # Bulk operation configuration self.last_bulk_insert_date = time() self.bulk_ids = [] # How many events can be buffered self.bulk_amount = 500 # What is the maximum duration until bulk insert self.bulk_delay = 3 self.incoming_events = {} self.autolog = autolog self.logger.debug("Init Archiver on %s" % namespace) self.account = Account(user="******", group="root") if not storage: self.logger.debug(" + Get storage") self.storage = get_storage( namespace=namespace, logging_level=self.log_lvl ) else: self.storage = storage self.conf_storage = get_storage( namespace=confnamespace, logging_level=self.log_lvl ) self.conf_collection = self.conf_storage.get_backend(confnamespace) self.collection = self.storage.get_backend(namespace) if amqp_pub is None: self.amqp_pub = AmqpPublisher( get_default_amqp_connection(), self.logger) self.reset_stealthy_event_duration = time() self.reset_stats() def reset_stats(self): self.stats = { 'update': 0, 'insert ' + self.namespace: 0, 'insert ' + self.namespace_log: 0 } def beat(self): self.logger.info( ( 'DB documents stats : ' + 'update: {} in events, ' + 'insert: {} in events, ' + 'insert: {} in events_log').format( self.stats['update'], self.stats['insert ' + self.namespace], self.stats['insert ' + self.namespace_log] ) ) self.reset_stats() def process_insert_operations_collection(self, operations, collection): self.stats['insert ' + collection] += len(operations) if operations: # is there any event to process ? backend = self.storage.get_backend(collection) bulk = backend.initialize_unordered_bulk_op() for operation in operations: record = Record(operation['event']) record.type = "event" event = record.dump() bulk.insert(event) try: bulk.execute({'w': 0}) except BulkWriteError as bwe: self.logger.warning(bwe.details) self.logger.info('inserted log events {}'.format(len(operations))) def process_update_operations(self, operations): self.stats['update'] += len(operations) if operations: # is there any event to process ? backend = self.storage.get_backend('events') bulk = backend.initialize_unordered_bulk_op() for operation in operations: bulk.find(operation['query']).update(operation['update']) bulk.execute({'w': 0}) def process_insert_operations(self, insert_operations): events = {} events_log = {} # Avoid same RK insert for operation in insert_operations: if '_id' not in operation['event']: self.logger.error( 'Unable to find _id value in event {}'.format( operation['event'] ) ) else: _id = operation['event']['_id'] if operation['collection'] == self.namespace: events[_id] = operation elif operation['collection'] == self.namespace_log: _id = '{}.{}'.format(_id, time()) operation['event']['_id'] = _id events_log[_id] = operation else: self.logger.critical( 'Wrong operation type {}'.format( operation['collection'] ) ) self.process_insert_operations_collection( events.values(), 'events' ) self.process_insert_operations_collection( events_log.values(), 'events_log' ) def reload_configuration(self): # Default values self.restore_event = True self.bagot_freq = 10 self.bagot_time = 3600 self.stealthy_time = 360 self.stealthy_show = 360 state_config = self.conf_collection.find_one( {'crecord_type': 'statusmanagement'} ) if state_config is not None: self.bagot_freq = state_config.get('bagot_freq', self.bagot_freq) self.bagot_time = state_config.get('bagot_time', self.bagot_time) self.stealthy_time = state_config.get( 'stealthy_time', self.stealthy_time ) self.stealthy_show = state_config.get( 'stealthy_show', self.stealthy_show ) self.restore_event = state_config.get( 'restore_event', self.restore_event ) self.logger.debug( ( 'Archiver configuration ->\n' + ' bagot_freq {}\n' + ' bagot_time {}\n' + ' stealthy_show {}\n' + ' stealthy_time {}' ).format( self.bagot_freq, self.bagot_time, self.stealthy_show, self.stealthy_time ) ) def reset_status_event(self, reset_type): """Trigger event status reset to off/on going status if event are in BAGOT or STEALTHY status. :param reset_type: event status to consider and change. :type int: This is en enum, can be either BAGOT or STEALTHY """ if reset_type not in [BAGOT, STEALTHY]: self.logger.info('wrong reset type given, will not process') return # Dynamic method parameter depends on reset type input compare_property = { BAGOT: 'last_state_change', STEALTHY: 'ts_first_stealthy' }[reset_type] configuration_delay = { BAGOT: self.bagot_time, STEALTHY: self.stealthy_show }[reset_type] event_cursor = self.collection.find( { 'crecord_type': 'event', 'status': reset_type } ) # Change all potention reset type events for event in event_cursor: # This is a bagot event. is_show_delay_passed = \ time() - event[compare_property] >= configuration_delay # Check the stealthy intervals if is_show_delay_passed: self.logger.info( 'Event {} no longer in status {}'.format( event['rk'], reset_type ) ) new_status = ONGOING if event['state'] else OFF self.set_status(event, new_status) event['pass_status'] = 1 self.amqp_pub.canopsis_event(event) def is_bagot(self, event): """ Args: event map of the current evet Returns: ``True`` if the event is bagot ``False`` otherwise """ ts_curr = event['timestamp'] ts_first_bagot = event.get('ts_first_bagot', 0) ts_diff_bagot = (ts_curr - ts_first_bagot) freq = event.get('bagot_freq', -1) result = ts_diff_bagot <= self.bagot_time and freq >= self.bagot_freq return result def is_stealthy(self, event, d_status): """ Args: event map of the current evet d_status status of the previous event Returns: ``True`` if the event is stealthy ``False`` otherwise """ ts_diff = event['timestamp'] - event['ts_first_stealthy'] result = ts_diff <= self.stealthy_time and d_status != STEALTHY return result def set_status(self, event, status, devent=None): """ Args: event map of the current event status status of the current event """ log = 'Status is set to {} for event {}'.format(status, event['rk']) bagot_freq = event.get('bagot_freq', 0) values = { OFF: { 'freq': bagot_freq, 'name': 'Off' }, ONGOING: { 'freq': bagot_freq, 'name': 'On going' }, STEALTHY: { 'freq': bagot_freq, 'name': 'Stealthy' }, BAGOT: { 'freq': bagot_freq + 1, 'name': 'Bagot' }, CANCELED: { 'freq': bagot_freq, 'name': 'Cancelled' } } self.logger.debug(log.format(values[status]['name'])) # This is an additional check as stealthy # status is not properly managed until now if status != STEALTHY: event['status'] = status elif devent['state'] != 0 and event['state'] == 0: delta = time() - event['last_state_change'] if delta < self.stealthy_time: event['status'] = status event['bagot_freq'] = values[status]['freq'] if status not in [STEALTHY, BAGOT]: event['ts_first_stealthy'] = 0 def check_stealthy(self, devent, ts): """ Args: devent map of the previous event ts timestamp of the current event Returns: ``True`` if the event should stay stealthy ``False`` otherwise """ result = False if devent['status'] == STEALTHY: result = (ts - devent['ts_first_stealthy']) <= self.stealthy_show return result def check_statuses(self, event, devent): """ Args: event map of the current event devent map of the previous evet """ if event.get('pass_status', 0): event['pass_status'] = 0 return event_ts = event['timestamp'] event['bagot_freq'] = devent.get('bagot_freq', 0) event['ts_first_stealthy'] = devent.get('ts_first_stealthy', 0) event['ts_first_bagot'] = devent.get('ts_first_bagot', 0) dstate = devent['state'] # Increment frequency if state changed and set first occurences if ((not dstate and event['state']) or dstate and not event['state']): if event['state']: event['ts_first_stealthy'] = event_ts else: event['ts_first_stealthy'] = event_ts event['bagot_freq'] += 1 if not event['ts_first_bagot']: event['ts_first_bagot'] = event_ts # Out of bagot interval, reset variables if event['ts_first_bagot'] - event_ts > self.bagot_time: event['ts_first_bagot'] = 0 event['bagot_freq'] = 0 # If not canceled, proceed to check the status if (devent.get('status', ONGOING) != CANCELED or (dstate != event['state'] and (event['state'] == OFF or dstate == OFF))): # Check the stealthy intervals if self.check_stealthy(devent, event_ts): if self.is_bagot(event): self.set_status(event, BAGOT) else: self.set_status(event, STEALTHY, devent=devent) # Else proceed normally else: if (event['state'] == OFF): # If still non-alert, can only be OFF if (not self.is_bagot(event) and not self.is_stealthy(event, devent['status'])): self.set_status(event, OFF) elif self.is_bagot(event): self.set_status(event, BAGOT) elif self.is_stealthy(event, devent['status']): self.set_status(event, STEALTHY, devent=devent) else: # If not bagot/stealthy, can only be ONGOING if (not self.is_bagot(event) and not self.is_stealthy(event, devent['status'])): self.set_status(event, ONGOING) elif self.is_bagot(event): self.set_status(event, BAGOT) elif self.is_stealthy(event, devent['status']): if devent['status'] == OFF: self.set_status(event, ONGOING) else: self.set_status(event, STEALTHY, devent=devent) else: self.set_status(event, CANCELED) def check_event(self, _id, event): """ This method aims to buffer and process incoming events. Processing is done on buffer to reduce database operations. """ # As this was not done until now... setting event primary key event['_id'] = _id # Buffering event informations self.bulk_ids.append(_id) self.incoming_events[_id] = event # Processing many events condition computation bulk_modulo = len(self.bulk_ids) % self.bulk_amount elapsed_time = time() - self.last_bulk_insert_date # When enough event buffered/time elapsed # processing events buffers if bulk_modulo == 0 or elapsed_time > self.bulk_delay: insert_operations = [] update_operations = [] query = {'_id': {'$in': self.bulk_ids}} devents = {} # Put previous events in pretty data structure backend = self.storage.get_backend(self.namespace) for devent in backend.find(query): devents[devent['_id']] = devent # Try to match previous and new incoming event for _id in self.incoming_events: event = self.incoming_events[_id] devent = None if _id in devents: devent = devents[_id] # Effective archiver processing call operations = self.process_an_event(_id, event, devent) for operation in operations: if operation['type'] == 'insert': insert_operations.append(operation) else: update_operations.append(operation) self.process_insert_operations(insert_operations) self.process_update_operations(update_operations) # Buld processing done, reseting informations self.bulk_ids = [] self.incoming_events = {} self.last_bulk_insert_date = time() # Half useless retro compatibility if 'state' in event and event['state']: return _id def process_an_event(self, _id, event, devent): operations = [] changed = False new_event = False state = event['state'] state_type = event['state_type'] now = int(time()) event['timestamp'] = event.get('timestamp', now) try: # Get old record exclusion_fields = { 'perf_data_array', 'processing' } if not devent: new_event = True # may have side effects on acks/cancels devent = {} old_state = devent['state'] old_state_type = devent['state_type'] event['last_state_change'] = devent.get( 'last_state_change', event['timestamp'] ) if state != old_state: event['previous_state'] = old_state if state != old_state or state_type != old_state_type: changed = True self.check_statuses(event, devent) except Exception: # No old record event['ts_first_stealthy'] = 0 changed = True old_state = state if changed: # Tests if change is from alert to non alert if ('last_state_change' in event and (state == 0 or (state > 0 and old_state == 0))): event['previous_state_change_ts'] = event['last_state_change'] event['last_state_change'] = event.get('timestamp', now) if new_event: # copy avoid side effects operations.append({ 'type': 'insert', 'event': event.copy(), 'collection': 'events' }) else: change = {} # Clear ack related information when event goes ok state # Ack is kept in the current event below if event['state'] == 0 and devent.get('state') != 0: for key in ( 'ticket_declared_author', 'ticket_declared_date', 'ticket_date', 'ticket' ): change[key] = None # keep ack information if status does not reset event if 'ack' in devent: if event['status'] == 0: was_ack = devent.get('ack', {}).get('isAck', False) # save was ack for stats purposes change['ack'] = { 'wasAck': was_ack } else: change['ack'] = devent['ack'] # remove was ack for accurate stats # when event change to any alert state if 'wasAck' in change['ack']: del change['ack']['wasAck'] # keep cancel information if status does not reset event if 'cancel' in devent: if event['status'] not in [0, 1]: change['cancel'] = devent['cancel'] else: change['cancel'] = {} # Remove ticket information in case state is back to normal # (both ack and ticket declaration case) if 'ticket_declared_author' in devent and event['status'] == 0: change['ticket_declared_author'] = None change['ticket_declared_date'] = None # Remove ticket information in case state is back to normal # (ticket number declaration only case) if 'ticket' in devent and event['status'] == 0: del devent['ticket'] if 'ticket_date' in devent: del devent['ticket_date'] # Generate diff change from old event to new event for key in event: if key not in exclusion_fields: if (key in event and key in devent and devent[key] != event[key]): change[key] = event[key] elif key in event and key not in devent: change[key] = event[key] # Manage keep state key that allow # from UI to keep the choosen state # into until next ok state event_reset = False # When a event is ok again, dismiss keep_state statement if devent.get('keep_state') and event['state'] == 0: change['keep_state'] = False event_reset = True # assume we do not just received a keep state and # if keep state was sent previously # then override state of new event if 'keep_state' not in event: if not event_reset and devent.get('keep_state'): change['state'] = devent['state'] # Keep previous output if 'keep_state' in event: change['change_state_output'] = event['output'] change['output'] = devent.get('output', '') if change: operations.append( { 'type': 'update', 'update': {'$set': change}, 'query': {'_id': _id}, 'collection': 'events' } ) # I think that is the right condition to log have_to_log = event.get('previous_state', state) != state if have_to_log: # store ack information to log collection if 'ack' in devent: event['ack'] = devent['ack'] self.logger.info(' + State changed, have to log {}'.format(_id)) # copy avoid side effects operations.append( { 'type': 'insert', 'event': event.copy(), 'collection': 'events_log' } ) return operations
def _amqp_setup(self): logger = logging.getLogger("test_base") self.amqp_conn = AmqpConnection(self.AMQP_URL) self.amqp_pub = AmqpPublisher(self.amqp_conn, logger)
class Watcher: """Watcher class""" def __init__(self, amqp_pub=None): """ :param amqp_pub canopsis.common.amqp.AmqpPublisher: """ self.logger = Logger.get('watcher', LOG_PATH) self.watcher_storage = Middleware.get_middleware_by_uri( 'mongodb-default-watcher://') self.alert_storage = Middleware.get_middleware_by_uri( 'mongodb-periodical-alarm://') self.sla_storage = Middleware.get_middleware_by_uri( 'storage-default-sla://') self.context_graph = ContextGraph(self.logger) self.pbehavior_manager = PBehaviorManager( *PBehaviorManager.provide_default_basics() ) self.amqp_pub = amqp_pub if amqp_pub is None: self.amqp_pub = AmqpPublisher(get_default_amqp_conn(), self.logger) def get_watcher(self, watcher_id): """Retreive from database the watcher specified by is watcher id. :param str watcher_id: the watcher id :return dict: the wanted watcher. None, if no watcher match the watcher_id """ watcher = self.context_graph.get_entities_by_id(watcher_id) try: return watcher[0] except IndexError: return None def create_watcher(self, body): """ Create watcher entity in context and link to entities. :param dict body: watcher conf """ watcher_id = body['_id'] try: watcher_finder = json.loads(body['mfilter']) except ValueError: self.logger.error('can t decode mfilter') return None except KeyError: self.logger.error('no filter') return None depends_list = self.context_graph.get_entities( query=watcher_finder, projection={'_id': 1} ) self.watcher_storage.put_element(body) depend_list = [] for entity_id in depends_list: depend_list.append(entity_id['_id']) entity = ContextGraph.create_entity_dict( id=watcher_id, name=body['display_name'], etype='watcher', impact=[], depends=depend_list ) # adding the fields specific to the Watcher entities entity['mfilter'] = body['mfilter'] entity['state'] = 0 try: self.context_graph.create_entity(entity) except ValueError: self.context_graph.update_entity(entity) self.compute_state(watcher_id) return True # TODO: return really something def update_watcher(self, watcher_id, updated_field): """Update the watcher specified by is watcher id with updated_field. Raise a ValueError, if the watcher_id do not match any entity. :param str watcher_id: the watcher_id of the watcher to update :param dict updated_field: the fields to update :returns: the updated Watcher :rtype: <Watcher> """ watcher = self.get_watcher(watcher_id) if watcher is None: raise ValueError("No watcher found for the following" " id: {}".format(watcher_id)) if "mfilter" in watcher.keys() and "mfilter" in updated_field.keys(): if updated_field['mfilter'] != watcher['mfilter']: watcher['mfilter'] = updated_field['mfilter'] query = json.loads(updated_field['mfilter']) entities = self.context_graph.get_entities( query=query, projection={'_id': 1}) watcher["depends"] = [entity["_id"] for entity in entities] for key in updated_field: if key == "infos": # update fields inside infos for info_key in updated_field["infos"]: watcher["infos"][info_key] = updated_field["infos"][ info_key] watcher[key] = updated_field[key] self.context_graph.update_entity(watcher) def delete_watcher(self, watcher_id): """ Delete watcher & disable watcher entity in context. :param string watcher_id: watcher_id :returns: the mongodb dict response """ self.context_graph.delete_entity(watcher_id) self.sla_storage.remove_elements(ids=[watcher_id]) return self.watcher_storage.remove_elements(ids=[watcher_id]) def alarm_changed(self, alarm_id): """ Launch a computation of a watcher state. :param alarm_id: alarm id """ watchers = self.context_graph.get_entities(query={'type': 'watcher'}) for i in watchers: if alarm_id in i['depends']: self.compute_state(i['_id']) def compute_watchers(self): """ Compute all watchers states. """ watchers = list(self.watcher_storage.get_elements(query={})) for watcher in watchers: self.compute_state(watcher['_id']) def compute_state(self, watcher_id): """ Send an event watcher with the new state of the watcher. :param watcher_id: watcher id """ try: watcher_entity = self.context_graph.get_entities( query={'_id': watcher_id})[0] except IndexError: return None entities = watcher_entity['depends'] query = {"_id": {"$in": entities}, "enabled": True} cursor = self.context_graph.get_entities(query=query, projection={"_id": 1}) entities = [] for ent in cursor: entities.append(ent["_id"]) display_name = watcher_entity['name'] alarm_list = list(self.alert_storage._backend.find({ '$and': [ {'d': {'$in': entities}}, { '$or': [ {'v.resolved': None}, {'v.resolved': {'$exists': False}} ] } ] })) states = [] for alarm in alarm_list: pbh_alarm = self.pbehavior_manager.get_pbehaviors_by_eid(alarm['d']) active_pbh = [] now = int(time.time()) for pbh in pbh_alarm: if self.pbehavior_manager.check_active_pbehavior(now, pbh): active_pbh.append(pbh) if len(active_pbh) == 0: states.append(alarm['v']['state']['val']) nb_entities = len(entities) nb_crit = states.count(Check.CRITICAL) nb_major = states.count(Check.MAJOR) nb_minor = states.count(Check.MINOR) nb_ok = nb_entities - (nb_crit + nb_major + nb_minor) # here add selection for calculation method actually it's worst state # by default and think to add pbehavior in tab computed_state = self.worst_state(nb_crit, nb_major, nb_minor) output = '{0} ok, {1} minor, {2} major, {3} critical'.format( nb_ok, nb_minor, nb_major, nb_crit) if computed_state != watcher_entity.get('state', None): watcher_entity['state'] = computed_state self.context_graph.update_entity(watcher_entity) self.publish_event( display_name, computed_state, output, watcher_entity['_id'] ) def compute_slas(self): """ Launch the sla calcul for each watchers. """ watcher_list = self.context_graph.get_entities( query={'type': 'watcher', 'infos.enabled': True}) for watcher in watcher_list: self.sla_compute(watcher['_id'], watcher['infos']['state']) def publish_event(self, display_name, computed_state, output, _id): """ Publish an event watcher on amqp. TODO: move that elsewhere (not specific to watchers) :param display_name: watcher display_name :param computed_state: watcher state :param output: watcher output """ event = forger( connector="canopsis", connector_name="engine", event_type="watcher", source_type="component", component=_id, state=computed_state, output=output, perf_data_array=[], display_name=display_name) self.amqp_pub.canopsis_event(event) def sla_compute(self, watcher_id, state): """ Launch the sla calcul. :param watcher_id: watcher id :param state: watcher state """ # sla_tab = list( # self.sla_storage.get_elements(query={'_id': watcher_id}))[0] # sla_tab['states'][state] = sla_tab['states'][state] + 1 # self.sla_storage.put_element(sla_tab) # watcher_conf = list( # self[self.WATCHER_STORAGE].get_elements( # query={'_id': watcher_id}) # )[0] # sla = Sla(self[self.WATCHER_STORAGE], # 'test/de/rk/on/verra/plus/tard', # watcher_conf['sla_output_tpl'], # watcher_conf['sla_timewindow'], # watcher_conf['sla_warning'], # watcher_conf['alert_level'], # watcher_conf['display_name']) # self.logger.critical('{0}'.format(( # sla_tab['states']/ # (sla_tab['states'][1] + # sla_tab['states'][2] + # sla_tab['states'][3])))) pass @staticmethod def worst_state(nb_crit, nb_major, nb_minor): """Calculate the worst state. :param int nb_crit: critical number :param int nb_major: major number :param int nb_minor: minor number :return int state: return the worst state """ if nb_crit > 0: return 3 elif nb_major > 0: return 2 elif nb_minor > 0: return 1 return 0