def test_flexible_downtime_service(self): """Test broks when downtime :return: None """ self.setup_with_file('cfg/cfg_default.cfg') self._main_broker.broks = [] host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False svc = self._scheduler.services.find_srv_by_name_and_hostname( "test_host_0", "test_ok_0") # To make tests quicker we make notifications send very quickly svc.checks_in_progress = [] svc.act_depend_of = [] # no hostchecks on critical checkresults svc.event_handler_enabled = False self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 0, 'OK']]) time.sleep(0.1) # schedule a 5 seconds downtime duration = 5 now = int(time.time()) # downtime valid for 5 seconds from now cmd = "[%lu] SCHEDULE_SVC_DOWNTIME;test_host_0;test_ok_0;%d;%d;0;0;%d;" \ "downtime author;downtime comment" % (now, now, now + 3600, duration) self._scheduler.run_external_commands([cmd]) self.scheduler_loop(2, [[host, 0, 'UP'], [svc, 0, 'OK']]) brok_downtime_raise = [] brok_downtime_expire = [] for brok in self._main_broker.broks: if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 0 assert len(brok_downtime_expire) == 0 time.sleep(1) self._main_broker.broks = [] self.scheduler_loop(3, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) for brok in self._main_broker.broks: if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 1 assert len(brok_downtime_expire) == 0 hdata = unserialize(brok_downtime_raise[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0'
def put_results(self): """Put results to scheduler, used by poller or reactionner when they are in active mode (passive = False) This function is not intended for external use. Let the poller and reactionner manage all this stuff by themselves ;) :param from: poller/reactionner identification :type from: str :param results: list of actions results :type results: list :return: True :rtype: bool """ res = cherrypy.request.json who_sent = res['from'] results = res['results'] results = unserialize(results, no_load=True) if results: logger.debug("Got some results: %d results from %s", len(results), who_sent) else: logger.debug("-> no results") for result in results: logger.debug("-> result: %s", result) # Append to the scheduler result queue self.app.sched.waiting_results.put(result) return True
def test_brok_checks_results(self): """Test broks checks results :return: None """ self.setup_with_file('cfg/cfg_default.cfg') self._main_broker.broks = [] host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False svc = self._scheduler.services.find_srv_by_name_and_hostname( "test_host_0", "test_ok_0") # To make tests quicker we make notifications send very quickly svc.notification_interval = 0.001 svc.checks_in_progress = [] svc.act_depend_of = [] # no hostchecks on critical checkresults svc.event_handler_enabled = False self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 0, 'OK']]) time.sleep(0.1) host_check_results = [] service_check_results = [] for brok in self._main_broker.broks: if brok.type == 'host_check_result': print(("Brok %s: %s" % (brok.type, brok))) host_check_results.append(brok) elif brok.type == 'service_check_result': print(("Brok %s: %s" % (brok.type, brok))) service_check_results.append(brok) assert len(host_check_results) == 1 assert len(service_check_results) == 1 hdata = unserialize(host_check_results[0].data) assert hdata['state'] == 'DOWN' assert hdata['state_type'] == 'SOFT' sdata = unserialize(service_check_results[0].data) assert sdata['state'] == 'OK' assert sdata['state_type'] == 'HARD'
def test_cancel_service(self): """Test broks when cancel downtime :return: None """ self.setup_with_file('cfg/cfg_default.cfg') self._main_broker.broks = [] host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False self.scheduler_loop(1, [[host, 0, 'UP']]) duration = 5 now = int(time.time()) # downtime valid for 5 seconds from now cmd = "[%lu] SCHEDULE_SVC_DOWNTIME;test_host_0;test_ok_0;%d;%d;1;0;%d;" \ "downtime author;downtime comment" % (now, now, now + duration, duration) self._scheduler.run_external_commands([cmd]) self.external_command_loop() brok_downtime_raise = [] brok_downtime_expire = [] for brok in self._main_broker.broks: if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 1 assert len(brok_downtime_expire) == 0 # External command: delete all host downtime now = int(time.time()) self._main_broker.broks = [] cmd = '[%d] DEL_ALL_SVC_DOWNTIMES;test_host_0;test_ok_0' % now self._scheduler.run_external_commands([cmd]) self.external_command_loop() brok_downtime_raise = [] brok_downtime_expire = [] for brok in self._main_broker.broks: if brok.type == 'downtime_raise': brok_downtime_raise.append(brok) elif brok.type == 'downtime_expire': brok_downtime_expire.append(brok) assert len(brok_downtime_raise) == 0 assert len(brok_downtime_expire) == 1 hdata = unserialize(brok_downtime_expire[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0'
def get_events(self): """Send a HTTP request to the satellite (GET /_events) Get monitoring events from the satellite. :return: Broks list on success, [] on failure :rtype: list """ res = self.con.get('_events', wait=False) logger.debug("Got events from %s: %s", self.name, res) return unserialize(res, True)
def get_external_commands(self): """Send a HTTP request to the satellite (GET /_external_commands) to get the external commands from the satellite. :return: External Command list on success, [] on failure :rtype: list """ res = self.con.get('_external_commands', wait=False) logger.debug("Got %d external commands from %s: %s", len(res), self.name, res) return unserialize(res, True)
def setup_new_conf(self): # pylint: disable=too-many-branches """Setup the new configuration received from Arbiter This function calls the base satellite treatment and manages the configuration needed for a simple satellite daemon that executes some actions (eg. poller or reactionner): - configure the passive mode - configure the workers - configure the tags - configure the modules :return: None """ # Execute the base class treatment... super(Satellite, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: logger.info("Received a new configuration") # self_conf is our own configuration from the fusionsupervision environment # self_conf = self.cur_conf['self_conf'] # Now manage modules if not self.have_modules: try: self.modules = unserialize(self.cur_conf['modules'], no_load=True) except FusionsupervisionClassLookupException as exp: # pragma: no cover, simple protection logger.error('Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.info("I received some modules configuration: %s", self.modules) self.have_modules = True for module in self.modules: if module.name not in self.q_by_mod: self.q_by_mod[module.name] = {} self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") my_satellites = self.get_links_of_type(s_type='') for satellite in list(my_satellites.values()): logger.info("- : %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Now I have a configuration! self.have_conf = True
def prepare(self): """Un-serialize data from data attribute and add instance_id key if necessary :return: None """ # Maybe the Brok is a old daemon one or was already prepared # if so, the data is already ok if hasattr(self, 'prepared') and not self.prepared: self.data = unserialize(self.data) if self.instance_id: self.data['instance_id'] = self.instance_id self.prepared = True
def test_unserialize_check(self): """ Test unserialize checks :return: None """ var = ''' {"content": {"check_type":0,"exit_status":3,"creation_time":1469152287.6731250286, "reactionner_tag":"None","s_time":0.0, "uuid":"5f1b16fa809c43379822c7acfe789660","check_time":0,"long_output":"", "state":0,"internal":false,"u_time":0.0,"env":{},"depend_on_me":[], "ref":"1fe5184ea05d439eb045399d26ed3337","from_trigger":false, "status":"scheduled","execution_time":0.0,"worker":"none","t_to_go":1469152290, "module_type":"echo","_in_timeout":false,"dependency_check":false,"type":"", "depend_on":[],"is_a":"check","poller_tag":"None","command":"_echo", "timeout":30,"output":"","perf_data":""}, "__sys_python_module__":"fusionsupervision.check.Check" } ''' unserialize(var) assert True
def get_broks(self, broker_name): """Send a HTTP request to the satellite (GET /_broks) Get broks from the satellite. Un-serialize data received. :param broker_name: the concerned broker link :type broker_name: BrokerLink :return: Broks list on success, [] on failure :rtype: list """ res = self.con.get('_broks', {'broker_name': broker_name}, wait=False) logger.debug("Got broks from %s: %s", self.name, res) return unserialize(res, True)
def get_actions(self, params): """Send a HTTP request to the satellite (GET /_checks) Get actions from the scheduler. Un-serialize data received. :param params: the request parameters :type params: str :return: Actions list on success, [] on failure :rtype: list """ res = self.con.get('_checks', params, wait=True) logger.debug("Got checks to execute from %s: %s", self.name, res) return unserialize(res, True)
def test_timeperiod_transition_log(self): self.setup_with_file('cfg/cfg_default.cfg') tp = self._scheduler.timeperiods.find_by_name('24x7') self.assertIsNot(tp, None) data = unserialize(tp.check_and_log_activation_change().data) assert data['level'] == 'info' assert data['message'] == 'TIMEPERIOD TRANSITION: 24x7;-1;1' # Now make this tp unable to be active again by removing al it's daterange dr = tp.dateranges tp.dateranges = [] data = unserialize(tp.check_and_log_activation_change().data) assert data['level'] == 'info' assert data['message'] == 'TIMEPERIOD TRANSITION: 24x7;1;0' # Ok, let get back to work tp.dateranges = dr data = unserialize(tp.check_and_log_activation_change().data) assert data['level'] == 'info' assert data['message'] == 'TIMEPERIOD TRANSITION: 24x7;0;1'
def _push_broks(self): """Push the provided broks objects to the broker daemon Only used on a Broker daemon by the Arbiter :param: broks :type: list :return: None """ data = cherrypy.request.json with self.app.arbiter_broks_lock: logger.debug("Pushing %d broks", len(data['broks'])) self.app.arbiter_broks.extend( [unserialize(elem, True) for elem in data['broks']])
def add_actions(self, actions_list, scheduler_instance_id): """Add a list of actions to the satellite queues :param actions_list: Actions list to add :type actions_list: list :param scheduler_instance_id: sheduler link to assign the actions to :type scheduler_instance_id: SchedulerLink :return: None """ # We check for new check in each schedulers and put the result in new_checks scheduler_link = None for scheduler_id in self.schedulers: logger.debug("Trying to add an action, scheduler: %s", self.schedulers[scheduler_id]) if scheduler_instance_id == self.schedulers[scheduler_id].instance_id: scheduler_link = self.schedulers[scheduler_id] break else: logger.error("Trying to add actions from an unknwown scheduler: %s", scheduler_instance_id) return if not scheduler_link: logger.error("Trying to add actions, but scheduler link is not found for: %s, " "actions: %s", scheduler_instance_id, actions_list) return logger.debug("Found scheduler link: %s", scheduler_link) for action in actions_list: # First we look if the action is identified uuid = getattr(action, 'uuid', None) if uuid is None: try: action = unserialize(action, no_load=True) uuid = action.uuid except FusionsupervisionClassLookupException: logger.error('Cannot un-serialize action: %s', action) continue # If we already have this action, we are already working for it! if uuid in scheduler_link.actions: continue # Action is attached to a scheduler action.my_scheduler = scheduler_link.uuid scheduler_link.actions[action.uuid] = action self.assign_to_a_queue(action)
def __init__(self, params=None, parsing=False): # pylint: disable=unused-argument self.operand = None self.sons = [] # Of: values are a triple OK,WARN,CRIT self.of_values = ('0', '0', '0') self.is_of_mul = False self.configuration_errors = [] self.not_value = False if params is not None: if 'operand' in params: self.operand = params['operand'] if 'sons' in params: self.sons = [unserialize(elem) for elem in params['sons']] # Of: values are a triple OK,WARN,CRIT if 'of_values' in params: self.of_values = tuple(params['of_values']) if 'is_of_mul' in params: self.is_of_mul = params['is_of_mul'] if 'not_value' in params: self.not_value = params['not_value']
def zlib_processor(entity): # pragma: no cover, not used in the testing environment... """Read application/zlib data and put content into entity.params for later use. :param entity: cherrypy entity :type entity: cherrypy._cpreqbody.Entity :return: None """ if not entity.headers.get(ntou("Content-Length"), ntou("")): raise cherrypy.HTTPError(411) body = entity.fp.read() try: body = zlib.decompress(body) except zlib.error: raise cherrypy.HTTPError(400, 'Invalid zlib data') try: raw_params = json.loads(body) except ValueError: raise cherrypy.HTTPError(400, 'Invalid JSON document in zlib data') try: params = {} for key, value in list(raw_params.items()): params[key] = unserialize(value.encode("utf8")) except TypeError: raise cherrypy.HTTPError(400, 'Invalid serialized data in JSON document') except FusionsupervisionClassLookupException as exp: cherrypy.HTTPError(400, 'Cannot un-serialize data received: %s' % exp) # Now that all values have been successfully parsed and decoded, # apply them to the entity.params dict. for key, value in list(params.items()): if key in entity.params: if not isinstance(entity.params[key], list): entity.params[key] = [entity.params[key]] entity.params[key].append(value) else: entity.params[key] = value
def test_acknowledge_service(self): """Test broks when acknowledge :return: None """ self.setup_with_file('cfg/cfg_default.cfg') host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False svc = self._scheduler.services.find_srv_by_name_and_hostname( "test_host_0", "test_ok_0") # To make tests quicker we make notifications send very quickly svc.checks_in_progress = [] svc.act_depend_of = [] # no hostchecks on critical checkresults svc.event_handler_enabled = False self._main_broker.broks = [] self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) time.sleep(0.1) now = time.time() cmd = "[{0}] ACKNOWLEDGE_SVC_PROBLEM;{1};{2};{3};{4};{5};{6};{7}\n". \ format(int(now), 'test_host_0', 'test_ok_0', 2, 0, 1, 'darth vader', 'normal process') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(3, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: print("Brok: %s" % brok) if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 1 assert len(brok_ack_expire) == 0 hdata = unserialize(brok_ack_raise[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0' assert hdata['comment'] == 'normal process' # return service in OK mode, so the acknowledge will be removed by the scheduler self._main_broker.broks = [] self.scheduler_loop(2, [[host, 0, 'UP'], [svc, 0, 'OK']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 0 assert len(brok_ack_expire) == 1 hdata = unserialize(brok_ack_expire[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0' # Do the same but remove acknowledge with external commands: self._main_broker.broks = [] self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) time.sleep(0.1) now = time.time() cmd = "[{0}] ACKNOWLEDGE_SVC_PROBLEM;{1};{2};{3};{4};{5};{6};{7}\n". \ format(int(now), 'test_host_0', 'test_ok_0', 2, 0, 1, 'darth vader', 'normal process') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) cmd = "[{0}] REMOVE_SVC_ACKNOWLEDGEMENT;{1};{2}\n". \ format(int(now), 'test_host_0', 'test_ok_0') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 2, 'CRITICAL']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: print(("Brok: %s" % brok)) if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 1 assert len(brok_ack_expire) == 1 hdata = unserialize(brok_ack_expire[0].data) assert hdata['host'] == 'test_host_0' assert hdata['service'] == 'test_ok_0' assert hdata['comment'] == 'normal process'
def test_unserialize_notif(self): """ Test unserialize notifications :return: None """ var = ''' {"98a76354619746fa8e6d2637a5ef94cb": { "content": { "reason_type": 1, "exit_status": 3, "creation_time":1468522950.2828259468, "command_call": { "args": [], "call": "notify-service", "command": { "command_line": "$USER1$\/notifier.pl --hostname $HOSTNAME$ --servicedesc $SERVICEDESC$ --notificationtype $NOTIFICATIONTYPE$ --servicestate $SERVICESTATE$ --serviceoutput $SERVICEOUTPUT$ --longdatetime $LONGDATETIME$ --serviceattempt $SERVICEATTEMPT$ --servicestatetype $SERVICESTATETYPE$", "command_name": "notify-service", "configuration_errors":[], "configuration_warnings":[], "enable_environment_macros": false, "id": "487aa432ddf646079ec6c07803333eac", "imported_from": "cfg\/default\/commands.cfg:14", "macros":{}, "module_type": "fork", "my_type":"command", "ok_up":"", "poller_tag": "None", "properties":{ "use":{ "brok_transformation": null, "class_inherit": [], "conf_send_preparation": null, "default":[], "fill_brok":[], "has_default":true, "help":"", "keep_empty":false, "managed":true, "merging":"uniq", "no_slots":false, "override":false, "required":false, "retention":false, "retention_preparation":null, "special":false, "split_on_coma":true, "to_send":false, "unmanaged":false, "unused":false}, "name":{ "brok_transformation":null, "class_inherit":[], "conf_send_preparation":null, "default":"", "fill_brok":[], "has_default":true, "help":"", "keep_empty":false, "managed":true, "merging":"uniq", "no_slots":false, "override":false, "required":false, "retention":false, "retention_preparation":null, "special":false, "split_on_coma":true, "to_send":false, "unmanaged":false, "unused":false}, }, "reactionner_tag":"None", "running_properties":{ "configuration_errors":{ "brok_transformation":null, "class_inherit":[], "conf_send_preparation":null, "default":[],"fill_brok":[], "has_default":true,"help":"","keep_empty":false, "managed":true,"merging":"uniq","no_slots":false,"override":false, "required":false,"retention":false,"retention_preparation":null, "special":false,"split_on_coma":true,"to_send":false, "unmanaged":false,"unused":false}, }, "tags":[], "timeout":-1, "uuid":"487aa432ddf646079ec6c07803333eac"}, "enable_environment_macros":false, "late_relink_done":false, "macros":{}, "module_type":"fork", "my_type":"CommandCall", "poller_tag":"None", "properties":{}, "reactionner_tag":"None", "timeout":-1, "uuid":"cfcaf0fc232b4f59a7d8bb5bd1d83fef", "valid":true}, "escalated":false, "reactionner_tag":"None", "s_time":0.0, "notification_type":0, "contact_name":"test_contact", "type":"PROBLEM", "uuid":"98a76354619746fa8e6d2637a5ef94cb", "check_time":0,"ack_data":"", "state":0,"u_time":0.0, "env":{ "NAGIOS_SERVICEDOWNTIME":"0", "NAGIOS_TOTALSERVICESUNKNOWN":"", "NAGIOS_LONGHOSTOUTPUT":"", "NAGIOS_HOSTDURATIONSEC":"1468522950", "NAGIOS_HOSTDISPLAYNAME":"test_host_0", }, "notif_nb":1,"_in_timeout":false,"enable_environment_macros":false, "host_name":"test_host_0", "status":"scheduled", "execution_time":0.0,"start_time":0,"worker":"none","t_to_go":1468522950, "module_type":"fork","service_description":"test_ok_0","sched_id":0,"ack_author":"", "ref":"272e89c1de854bad85987a7583e6c46b", "is_a":"notification", "contact":"4e7c4076c372457694684bdd5ba47e94", "command":"\/notifier.pl --hostname test_host_0 --servicedesc test_ok_0 --notificationtype PROBLEM --servicestate CRITICAL --serviceoutput CRITICAL --longdatetime Thu 14 Jul 21:02:30 CEST 2016 --serviceattempt 2 --servicestatetype HARD", "end_time":0,"timeout":30,"output":"", "already_start_escalations":[]}, "__sys_python_module__":"fusionsupervision.notification.Notification" } } ''' unserialize(var) assert True
def test_multibroker_onesched(self): """ Test with 2 brokers and 1 scheduler :return: None """ self.setup_with_file( 'cfg/multibroker/cfg_multi_broker_one_scheduler.cfg') my_scheduler = self._scheduler assert 2 == len(my_scheduler.my_daemon.brokers) # create broks host = my_scheduler.pushed_conf.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router svc = my_scheduler.pushed_conf.services.find_srv_by_name_and_hostname( "test_host_0", "test_ok_0") svc.checks_in_progress = [] svc.act_depend_of = [ ] # no raised host check on critical service check result self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 0, 'OK']]) # Count broks in each broker broker_broks_count = {} broker1_link_uuid = None broker2_link_uuid = None for broker_link_uuid in my_scheduler.my_daemon.brokers: if my_scheduler.my_daemon.brokers[ broker_link_uuid].name == 'broker-master': broker1_link_uuid = broker_link_uuid else: broker2_link_uuid = broker_link_uuid broker_broks_count[ my_scheduler.my_daemon.brokers[broker_link_uuid].name] = 0 print(("Broker %s:" % (my_scheduler.my_daemon.brokers[broker_link_uuid]))) for brok in my_scheduler.my_daemon.brokers[broker_link_uuid].broks: broker_broks_count[ my_scheduler.my_daemon.brokers[broker_link_uuid].name] += 1 print("- %s" % brok) # Same list of broks in the two brokers self.assertItemsEqual( my_scheduler.my_daemon.brokers[broker1_link_uuid].broks, my_scheduler.my_daemon.brokers[broker2_link_uuid].broks) # Scheduler HTTP interface sched_interface = SchedulerInterface(my_scheduler.my_daemon) # Test broker-master that gets its broks from the scheduler # Get the scheduler broks to be sent ... print("Broks to be sent:") to_send = [ b for b in my_scheduler.my_daemon.brokers[broker1_link_uuid].broks if getattr(b, 'sent_to_externals', False) ] for brok in to_send: print(("- %s" % (brok))) assert 6 == len(to_send) broks_list = sched_interface._broks('broker-master') broks_list = unserialize(broks_list, True) assert 6 == len(broks_list) assert broker_broks_count['broker-master'] == len(broks_list) # No more broks to get # Get the scheduler broks to be sent ... to_send = [ b for b in my_scheduler.my_daemon.brokers[broker1_link_uuid].broks if not getattr(b, 'got', False) ] assert 0 == len(to_send), "Still some broks to be sent!" # Test broker-master 2 that gets its broks from the scheduler # Get the scheduler broks to be sent ... to_send = [ b for b in my_scheduler.my_daemon.brokers[broker2_link_uuid].broks if getattr(b, 'sent_to_externals', False) ] print("Broks to be sent:") for brok in to_send: print(("- %s" % (brok))) assert 6 == len(to_send) broks_list = sched_interface._broks('broker-master2') broks_list = unserialize(broks_list, True) assert 6 == len(broks_list) assert broker_broks_count['broker-master2'] == len(broks_list) # No more broks to get # Get the scheduler broks to be sent ... to_send = [ b for b in my_scheduler.my_daemon.brokers[broker2_link_uuid].broks if not getattr(b, 'got', False) ] assert 0 == len(to_send), "Still some broks to be sent!" # Test unknown broker that gets its broks from the scheduler broks_list = sched_interface._broks('broker-unknown') broks_list = unserialize(broks_list, True) assert 0 == len(broks_list) # Re-get broks # Test broker-master that gets its broks from the scheduler broks_list = sched_interface._broks('broker-master') broks_list = unserialize(broks_list, True) # No broks ! assert 0 == len(broks_list) # Test broker-master 2 that gets its broks from the scheduler broks_list = sched_interface._broks('broker-master2') broks_list = unserialize(broks_list, True) # No broks ! assert 0 == len(broks_list) # Some new broks self.scheduler_loop(1, [[host, 0, 'UP'], [svc, 0, 'OK']]) # Same list of broks in the two brokers self.assertItemsEqual( my_scheduler.my_daemon.brokers[broker1_link_uuid].broks, my_scheduler.my_daemon.brokers[broker2_link_uuid].broks) assert len(my_scheduler.my_daemon.brokers[broker1_link_uuid].broks) > 1 assert len(my_scheduler.my_daemon.brokers[broker2_link_uuid].broks) > 1
def setup_new_conf(self): # pylint: disable=too-many-statements, too-many-branches, too-many-locals """Setup new conf received for scheduler :return: None """ # Execute the base class treatment... super(Fusionsupervision, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: # self_conf is our own configuration from the fusionsupervision environment # self_conf = self.cur_conf['self_conf'] logger.debug("Got config: %s", self.cur_conf) if 'conf_part' not in self.cur_conf: self.cur_conf['conf_part'] = None conf_part = self.cur_conf['conf_part'] # Ok now we can save the retention data if self.sched.pushed_conf is not None: self.sched.update_retention() # Get the monitored objects configuration t00 = time.time() received_conf_part = None try: received_conf_part = unserialize(conf_part) assert received_conf_part is not None except AssertionError as exp: # This to indicate that no configuration is managed by this scheduler... logger.warning("No managed configuration received from arbiter") except FusionsupervisionClassLookupException as exp: # pragma: no cover # This to indicate that the new configuration is not managed... self.new_conf = { "_status": "Cannot un-serialize configuration received from arbiter", "_error": str(exp) } logger.error(self.new_conf) logger.error("Back trace of the error:\n%s", traceback.format_exc()) return except Exception as exp: # pylint: disable=broad-except # This to indicate that the new configuration is not managed... self.new_conf = { "_status": "Cannot un-serialize configuration received from arbiter", "_error": str(exp) } logger.error(self.new_conf) self.exit_on_exception(exp, str(self.new_conf)) # if not received_conf_part: # return logger.info("Monitored configuration %s received at %d. Un-serialized in %d secs", received_conf_part, t00, time.time() - t00) logger.info("Scheduler received configuration : %s", received_conf_part) # Now we create our pollers, reactionners and brokers for link_type in ['pollers', 'reactionners', 'brokers']: if link_type not in self.cur_conf['satellites']: logger.error("Missing %s in the configuration!", link_type) continue my_satellites = getattr(self, link_type, {}) received_satellites = self.cur_conf['satellites'][link_type] for link_uuid in received_satellites: rs_conf = received_satellites[link_uuid] logger.debug("- received %s - %s: %s", rs_conf['instance_id'], rs_conf['type'], rs_conf['name']) # Must look if we already had a configuration and save our broks already_got = rs_conf['instance_id'] in my_satellites broks = [] actions = {} wait_homerun = {} external_commands = {} running_id = 0 if already_got: logger.warning("I already got: %s", rs_conf['instance_id']) # Save some information running_id = my_satellites[link_uuid].running_id (broks, actions, wait_homerun, external_commands) = \ my_satellites[link_uuid].get_and_clear_context() # Delete the former link del my_satellites[link_uuid] # My new satellite link... new_link = SatelliteLink.get_a_satellite_link(link_type[:-1], rs_conf) my_satellites[new_link.uuid] = new_link logger.info("I got a new %s satellite: %s", link_type[:-1], new_link) new_link.running_id = running_id new_link.external_commands = external_commands new_link.broks = broks new_link.wait_homerun = wait_homerun new_link.actions = actions # Replacing the satellite address and port by those defined in satellite_map if new_link.name in self.cur_conf['override_conf'].get('satellite_map', {}): override_conf = self.cur_conf['override_conf'] overriding = override_conf.get('satellite_map')[new_link.name] logger.warning("Do not override the configuration for: %s, with: %s. " "Please check whether this is necessary!", new_link.name, overriding) # First mix conf and override_conf to have our definitive conf for prop in getattr(self.cur_conf, 'override_conf', []): logger.debug("Overriden: %s / %s ", prop, getattr(received_conf_part, prop, None)) logger.debug("Overriding: %s / %s ", prop, self.cur_conf['override_conf']) setattr(received_conf_part, prop, self.cur_conf['override_conf'].get(prop, None)) # Scheduler modules if not self.have_modules: try: logger.debug("Modules configuration: %s", self.cur_conf['modules']) self.modules = unserialize(self.cur_conf['modules'], no_load=True) except FusionsupervisionClassLookupException as exp: # pragma: no cover, simple protection logger.error('Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.debug("I received some modules configuration: %s", self.modules) self.have_modules = True self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") if received_conf_part: logger.info("Loading configuration...") # Propagate the global parameters to the configuration items received_conf_part.explode_global_conf() # We give the configuration to our scheduler self.sched.reset() self.sched.load_conf(self.cur_conf['instance_id'], self.cur_conf['instance_name'], received_conf_part) # Once loaded, the scheduler has an inner pushed_conf object logger.info("Loaded: %s", self.sched.pushed_conf) # Update the scheduler ticks according to the daemon configuration self.sched.update_recurrent_works_tick(self) # We must update our pushed configuration macros with correct values # from the configuration parameters # self.sched.pushed_conf.fill_resource_macros_names_macros() # Creating the Macroresolver Class & unique instance m_solver = MacroResolver() m_solver.init(received_conf_part) # Now create the external commands manager # We are an applyer: our role is not to dispatch commands, but to apply them ecm = ExternalCommandManager( received_conf_part, 'applyer', self.sched, received_conf_part.accept_passive_unknown_check_results, received_conf_part.log_external_commands) # Scheduler needs to know about this external command manager to use it if necessary self.sched.external_commands_manager = ecm # Ok now we can load the retention data self.sched.retention_load() # Log hosts/services initial states self.sched.log_initial_states() # Create brok new conf brok = Brok({'type': 'new_conf', 'data': {}}) self.sched.add_brok(brok) # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") my_satellites = self.get_links_of_type(s_type='') for satellite in list(my_satellites.values()): logger.info("- : %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) if received_conf_part: # Enable the scheduling process logger.info("Loaded: %s", self.sched.pushed_conf) self.sched.start_scheduling() # Now I have a configuration! self.have_conf = True
def test_flapping(self): """Test host/service flapping detection :return: """ # Get the hosts and services" host = self._scheduler.hosts.find_by_name("test_host_0") host.act_depend_of = [] assert host.flap_detection_enabled router = self._scheduler.hosts.find_by_name("test_router_0") router.act_depend_of = [] assert router.flap_detection_enabled svc = self._scheduler.services.find_srv_by_name_and_hostname( "test_host_0", "test_ok_0") svc.event_handler_enabled = False svc.act_depend_of = [] # Force because the default configuration disables the flapping detection svc.flap_detection_enabled = True self.scheduler_loop(2, [[host, 0, 'UP | value1=1 value2=2'], [router, 0, 'UP | rtt=10'], [svc, 0, 'OK']]) assert 'UP' == host.state assert 'HARD' == host.state_type assert 'UP' == router.state assert 'HARD' == router.state_type assert 'OK' == svc.state assert 'HARD' == svc.state_type assert 25 == svc.low_flap_threshold # Set the service as a problem self.scheduler_loop(3, [[svc, 2, 'Crit']]) assert 'CRITICAL' == svc.state assert 'HARD' == svc.state_type # Ok, now go in flap! for i in range(1, 10): self.scheduler_loop(1, [[svc, 0, 'Ok']]) self.scheduler_loop(1, [[svc, 2, 'Crit']]) # Should be in flapping state now assert svc.is_flapping # We got 'monitoring_log' broks for logging to the monitoring logs... monitoring_logs = [] for brok in sorted(iter(self._main_broker.broks.values()), key=lambda x: x.creation_time): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) expected_logs = [ ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;HARD;2;Crit'), ('error', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;CRITICAL;' 'notify-service;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;HARD;2;Ok'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;OK;' 'notify-service;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE FLAPPING ALERT: test_host_0;test_ok_0;STARTED; ' 'Service appears to have started flapping (83.8% change >= 50.0% threshold)' ), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;' 'FLAPPINGSTART (OK);notify-service;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ] for log_level, log_message in expected_logs: assert (log_level, log_message) in monitoring_logs # Now we put it as back :) # 10 is not enouth to get back as normal for i in range(1, 11): self.scheduler_loop(1, [[svc, 0, 'Ok']]) assert svc.is_flapping # 10 others can be good (near 4.1 %) for i in range(1, 11): self.scheduler_loop(1, [[svc, 0, 'Ok']]) assert not svc.is_flapping # We got 'monitoring_log' broks for logging to the monitoring logs... monitoring_logs = [] for brok in sorted(iter(self._main_broker.broks.values()), key=lambda x: x.creation_time): if brok.type == 'monitoring_log': data = unserialize(brok.data) monitoring_logs.append((data['level'], data['message'])) print(("Logs: %s" % monitoring_logs)) expected_logs = [ ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;HARD;2;Crit'), ('error', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;CRITICAL;' 'notify-service;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;HARD;2;Ok'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;OK;' 'notify-service;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE FLAPPING ALERT: test_host_0;test_ok_0;STARTED; ' 'Service appears to have started flapping ' '(83.8% change >= 50.0% threshold)'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;' 'FLAPPINGSTART (OK);notify-service;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE ALERT: test_host_0;test_ok_0;OK;SOFT;2;Ok'), ('error', 'SERVICE ALERT: test_host_0;test_ok_0;CRITICAL;SOFT;1;Crit'), ('info', 'SERVICE FLAPPING ALERT: test_host_0;test_ok_0;STOPPED; ' 'Service appears to have stopped flapping ' '(21.5% change < 25.0% threshold)'), ('info', 'SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;' 'FLAPPINGSTOP (OK);notify-service;Ok') ] for log_level, log_message in expected_logs: assert (log_level, log_message) in monitoring_logs
def setup_new_conf(self): """Receiver custom setup_new_conf method This function calls the base satellite treatment and manages the configuration needed for a receiver daemon: - get and configure its satellites - configure the modules :return: None """ # Execute the base class treatment... super(Receiver, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: # self_conf is our own configuration from the fusionsupervision environment # self_conf = self.cur_conf['self_conf'] logger.debug("Got config: %s", self.cur_conf) # Configure and start our modules if not self.have_modules: try: self.modules = unserialize(self.cur_conf['modules'], no_load=True) except FusionsupervisionClassLookupException as exp: # pragma: no cover, simple protection logger.error( 'Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.info("I received some modules configuration: %s", self.modules) self.have_modules = True self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") # Now create the external commands manager # We are a receiver: our role is to get and dispatch commands to the schedulers global_conf = self.cur_conf.get('global_conf', None) if not global_conf: logger.error( "Received a configuration without any global_conf! " "This may hide a configuration problem with the " "realms and the manage_sub_realms of the satellites!") global_conf = { 'accept_passive_unknown_check_results': False, 'log_external_commands': True } self.external_commands_manager = \ ExternalCommandManager(None, 'receiver', self, global_conf.get( 'accept_passive_unknown_check_results', False), global_conf.get( 'log_external_commands', False)) # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") my_satellites = self.get_links_of_type(s_type='') for satellite in list(my_satellites.values()): logger.info("- : %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Now I have a configuration! self.have_conf = True
def test_acknowledge_host(self): """Test broks when acknowledge :return: None """ self.setup_with_file('cfg/cfg_default.cfg') self._main_broker.broks = [] host = self._scheduler.hosts.find_by_name("test_host_0") host.checks_in_progress = [] host.act_depend_of = [] # ignore the router host.event_handler_enabled = False svc = self._scheduler.services.find_srv_by_name_and_hostname( "test_host_0", "test_ok_0") svc.checks_in_progress = [] svc.act_depend_of = [] # no hostchecks on critical checkresults svc.event_handler_enabled = False self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) time.sleep(0.1) now = time.time() cmd = "[{0}] ACKNOWLEDGE_HOST_PROBLEM_EXPIRE;{1};{2};{3};{4};{5};{6};{7}\n". \ format(int(now), 'test_host_0', 1, 0, 1, (now + 2), 'darth vader', 'normal process') self._scheduler.run_external_commands([cmd]) self.external_command_loop(2) # self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) brok_ack = [] print("Broker uuid: %s" % self._main_broker.uuid) print("Broker broks: %s" % self._main_broker.broks) for brok in self._main_broker.broks: print("Broker brok: %s" % brok) if brok.type == 'acknowledge_raise': print("Brok: %s" % brok) brok_ack.append(brok) print("***Scheduler: %s" % self._scheduler) print("***Scheduler daemon: %s" % self._scheduler.my_daemon) print("***Scheduler daemon brokers: %s" % self._scheduler.my_daemon.brokers) for broker_link_uuid in self._scheduler.my_daemon.brokers: print("*** %s - broks: %s" % (broker_link_uuid, self._scheduler.my_daemon.brokers[broker_link_uuid].broks)) # Got one brok for the host ack and one brok for the service ack assert len(brok_ack) == 2 host_brok = False service_brok = False hdata = unserialize(brok_ack[0].data) assert hdata['host'] == 'test_host_0' if 'service' in hdata: assert hdata['service'] == 'test_ok_0' service_brok = True else: host_brok = True hdata = unserialize(brok_ack[1].data) assert hdata['host'] == 'test_host_0' if 'service' in hdata: assert hdata['service'] == 'test_ok_0' service_brok = True else: host_brok = True assert host_brok and service_brok # return host in UP mode, so the acknowledge will be removed by the scheduler self._main_broker.broks = [] self.scheduler_loop(2, [[host, 0, 'UP'], [svc, 0, 'OK']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 0 assert len(brok_ack_expire) == 2 host_brok = False service_brok = False hdata = unserialize(brok_ack_expire[0].data) assert hdata['host'] == 'test_host_0' if 'service' in hdata: assert hdata['service'] == 'test_ok_0' service_brok = True else: host_brok = True hdata = unserialize(brok_ack_expire[1].data) assert hdata['host'] == 'test_host_0' if 'service' in hdata: assert hdata['service'] == 'test_ok_0' service_brok = True else: host_brok = True assert host_brok and service_brok # Do the same but remove acknowledge with external commands: self._main_broker.broks = [] self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) time.sleep(0.1) now = time.time() cmd = "[{0}] ACKNOWLEDGE_HOST_PROBLEM_EXPIRE;{1};{2};{3};{4};{5};{6};{7}\n". \ format(int(now), 'test_host_0', 1, 0, 1, (now + 2), 'darth vader', 'normal process') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(1, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) cmd = "[{0}] REMOVE_HOST_ACKNOWLEDGEMENT;{1}\n". \ format(int(now), 'test_host_0') self._scheduler.run_external_commands([cmd]) self.scheduler_loop(3, [[host, 2, 'DOWN'], [svc, 2, 'CRITICAL']]) brok_ack_raise = [] brok_ack_expire = [] for brok in self._main_broker.broks: print("Brok: %s" % brok) if brok.type == 'acknowledge_raise': brok_ack_raise.append(brok) elif brok.type == 'acknowledge_expire': brok_ack_expire.append(brok) assert len(brok_ack_raise) == 2 assert len(brok_ack_expire) == 1 hdata = unserialize(brok_ack_expire[0].data) assert hdata['host'] == 'test_host_0' assert 'service' not in hdata
def _dispatching(self, env_filename='cfg/dispatcher/simple.ini', loops=3, multi_realms=False): """ Dispatching process: prepare, check, dispatch This function realize all the dispatching operations: - load a monitoring configuration - prepare the dispatching - dispatch - check the correct dispatching, including: - check the configuration dispatched to the schedulers - check the configuration dispatched to the spare arbiter (if any) - run the check_reachable loop several times if multi_realms is True, the scheduler configuration received are not checked against the arbiter whole configuration. This would be really too complex to assert on this :( Schedulers must have a port number with 7768 (eg. 7768,17768,27768,...) Spare daemons must have a port number with 8770 (eg. 8770,18770,28770,...) :return: None """ args = { 'env_file': env_filename, 'fusionsupervision_name': 'fusionsupervision-test', 'daemon_name': 'arbiter-master' } my_arbiter = Arbiter(**args) my_arbiter.setup_fusionsupervision_logger() # Clear logs self.clear_logs() # my_arbiter.load_modules_manager() my_arbiter.load_monitoring_config_file() assert my_arbiter.conf.conf_is_correct is True # logging.getLogger('fusionsupervision').setLevel(logging.DEBUG) objects_map = {} for _, _, strclss, _, _ in list( my_arbiter.conf.types_creations.values()): if strclss in ['hostescalations', 'serviceescalations']: continue objects_list = getattr(my_arbiter.conf, strclss, []) objects_map[strclss] = { 'count': len(objects_list), 'str': str(objects_list) } # print("Got %d %s: %s" % (len(objects_list), strclss, objects_list)) # Freeze the time ! initial_datetime = datetime.datetime.now() with freeze_time(initial_datetime) as frozen_datetime: assert frozen_datetime() == initial_datetime # #1 - Get a new dispatcher my_dispatcher = Dispatcher(my_arbiter.conf, my_arbiter.link_to_myself) print("*** All daemons WS: %s" % [ "%s:%s" % (link.address, link.port) for link in my_dispatcher.all_daemons_links ]) assert my_dispatcher.dispatch_ok is False assert my_dispatcher.new_to_dispatch is False assert my_dispatcher.first_dispatch_done is False self.assert_any_log_match( re.escape("Dispatcher arbiters/satellites map:")) for link in my_dispatcher.all_daemons_links: self.assert_any_log_match( re.escape(" - %s: %s" % (link.name, link.uri))) # Simulate the daemons HTTP interface (very simple simulation !) with requests_mock.mock() as mr: for link in my_dispatcher.all_daemons_links: mr.get('http://%s:%s/ping' % (link.address, link.port), json='pong') mr.get('http://%s:%s/identity' % (link.address, link.port), json={"running_id": 123456.123456}) mr.get('http://%s:%s/wait_new_conf' % (link.address, link.port), json=True) mr.get('http://%s:%s/fill_initial_broks' % (link.address, link.port), json=[]) mr.post('http://%s:%s/_push_configuration' % (link.address, link.port), json=True) mr.get('http://%s:%s/managed_configurations' % (link.address, link.port), json={}) mr.get('http://%s:%s/do_not_run' % (link.address, link.port), json=True) for link in my_dispatcher.all_daemons_links: # print("Satellite: %s / %s" % (link, link.cfg_to_manage)) assert not link.hash assert not link.push_flavor assert not link.cfg_to_manage assert not link.cfg_managed # #2 - Initialize connection with all our satellites for satellite in my_dispatcher.all_daemons_links: assert my_arbiter.daemon_connection_init(satellite) # All links have a running identifier for link in my_dispatcher.all_daemons_links: if link == my_dispatcher.arbiter_link: continue assert link.running_id == 123456.123456 self.assert_any_log_match(re.escape("got: 123456.123456")) # #3 - Check reachable - a configuration is not yet prepared, # so only check reachable state my_dispatcher.check_reachable() assert my_dispatcher.dispatch_ok is False assert my_dispatcher.first_dispatch_done is False assert my_dispatcher.new_to_dispatch is False # Not yet configured ... for link in my_dispatcher.all_daemons_links: if link == my_dispatcher.arbiter_link: continue self.assert_any_log_match( re.escape("The %s %s do not have a configuration" % (link.type, link.name))) # #3 - Check reachable - daemons got pinged too early... my_dispatcher.check_reachable() assert my_dispatcher.dispatch_ok is False assert my_dispatcher.first_dispatch_done is False assert my_dispatcher.new_to_dispatch is False # Only for Python > 2.7, DEBUG logs ... if os.sys.version_info > (2, 7): for link in my_dispatcher.all_daemons_links: if link == my_dispatcher.arbiter_link: continue self.assert_any_log_match( re.escape("Too early to ping %s" % (link.name))) self.assert_no_log_match( re.escape( "Dispatcher, these daemons are not configured: " "reactionner-master,poller-master,broker-master,receiver-master," "scheduler-master" ", and a configuration is ready to dispatch, run the dispatching..." )) # Time warp 5 seconds - overpass the ping period... self.clear_logs() frozen_datetime.tick(delta=datetime.timedelta(seconds=5)) # #3 - Check reachable - daemons provide their configuration my_dispatcher.check_reachable() assert my_dispatcher.dispatch_ok is False assert my_dispatcher.first_dispatch_done is False assert my_dispatcher.new_to_dispatch is False # Only for Python > 2.7, DEBUG logs ... if os.sys.version_info > (2, 7): # Still not configured ... for link in my_dispatcher.all_daemons_links: if link == my_dispatcher.arbiter_link: continue self.assert_any_log_match( re.escape( "My (%s) fresh managed configuration: {}" % link.name)) # #4 - Prepare dispatching assert my_dispatcher.new_to_dispatch is False my_dispatcher.prepare_dispatch() assert my_dispatcher.dispatch_ok is False assert my_dispatcher.first_dispatch_done is False assert my_dispatcher.new_to_dispatch is True self.assert_any_log_match( re.escape( "All configuration parts are assigned to schedulers and their satellites :)" )) # All links have a hash, push_flavor and cfg_to_manage for link in my_dispatcher.all_daemons_links: print("Link: %s" % link) assert getattr(link, 'hash', None) is not None assert getattr(link, 'push_flavor', None) is not None assert getattr(link, 'cfg_to_manage', None) is not None assert not link.cfg_managed # Not yet # #5 - Check reachable - a configuration is prepared, # this will force the daemons communication, no need for a time warp ;) my_dispatcher.check_reachable() # Only for Python > 2.7, DEBUG logs ... if os.sys.version_info > (2, 7): for link in my_dispatcher.all_daemons_links: if link == my_dispatcher.arbiter_link: continue self.assert_any_log_match( re.escape( "My (%s) fresh managed configuration: {}" % link.name)) self.assert_any_log_match( re.escape("Dispatcher, these daemons are not configured:")) self.assert_any_log_match( re.escape( ", and a configuration is ready to dispatch, run the dispatching..." )) self.assert_any_log_match( re.escape( "Trying to send configuration to the satellites...")) for link in my_dispatcher.all_daemons_links: if link == my_dispatcher.arbiter_link: continue self.assert_any_log_match( re.escape("Sending configuration to the %s %s" % (link.type, link.name))) # As of now the configuration is prepared and was dispatched to the daemons ! # Configuration already dispatched! with pytest.raises(DispatcherError): my_dispatcher.dispatch() self.show_logs() # Hack the requests history to check and simulate the configuration pushed... history = mr.request_history for index, request in enumerate(history): if '_push_configuration' in request.url: received = request.json() print(index, request.url, received) assert ['conf'] == list(received.keys()) conf = received['conf'] from pprint import pprint pprint(conf) assert 'fusionsupervision_name' in conf assert conf['fusionsupervision_name'] == 'My Alignak' assert 'self_conf' in conf assert conf['self_conf'] i_am = None for link in my_dispatcher.all_daemons_links: if link.type == conf['self_conf']['type'] \ and link.name == conf['self_conf']['name']: i_am = link break else: assert False print(("I am: %s" % i_am)) print(("I have: %s" % conf)) # All links have a hash, push_flavor and cfg_to_manage assert 'hash' in conf assert 'managed_conf_id' in conf assert 'arbiters' in conf if conf['self_conf']['manage_arbiters']: # All the known arbiters assert list(conf['arbiters'].keys()) == [ arbiter_link.uuid for arbiter_link in my_dispatcher.arbiters ] else: assert conf['arbiters'] == {} assert 'schedulers' in conf # Hack for the managed configurations link.cfg_managed = {} for scheduler_link in list( conf['schedulers'].values()): link.cfg_managed[scheduler_link['instance_id']] = { 'hash': scheduler_link['hash'], 'push_flavor': scheduler_link['push_flavor'], 'managed_conf_id': scheduler_link['managed_conf_id'] } print("Managed: %s" % link.cfg_managed) assert 'modules' in conf assert conf['modules'] == [] # Spare arbiter specific if '8770/_push_configuration' in request.url: # Spare arbiter receives all the monitored configuration assert 'whole_conf' in conf # String serialized configuration assert isinstance(conf['whole_conf'], string_types) managed_conf_part = unserialize(conf['whole_conf']) # Test a property to be sure conf loaded correctly assert managed_conf_part.instance_id == conf[ 'managed_conf_id'] # The spare arbiter got the same objects count as the master arbiter prepared! for _, _, strclss, _, _ in list( managed_conf_part.types_creations.values( )): # These elements are not included in the serialized configuration! if strclss in [ 'hostescalations', 'serviceescalations', 'arbiters', 'schedulers', 'brokers', 'pollers', 'reactionners', 'receivers', 'realms', 'modules', 'hostsextinfo', 'servicesextinfo', 'hostdependencies', 'servicedependencies' ]: continue objects_list = getattr(managed_conf_part, strclss, []) # print("Got %d %s: %s" % (len(objects_list), strclss, objects_list)) # Count and string dup are the same ! assert len(objects_list ) == objects_map[strclss]['count'] assert str(objects_list ) == objects_map[strclss]['str'] # Scheduler specific elif '7768/_push_configuration' in request.url: assert 'conf_part' in conf # String serialized configuration assert isinstance(conf['conf_part'], string_types) managed_conf_part = unserialize(conf['conf_part']) # Test a property to be sure conf loaded correctly assert managed_conf_part.instance_id == conf[ 'managed_conf_id'] # Hack for the managed configurations link.cfg_managed = { conf['instance_id']: { 'hash': conf['hash'], 'push_flavor': conf['push_flavor'], 'managed_conf_id': conf['managed_conf_id'] } } print("Managed: %s" % link.cfg_managed) # The scheduler got the same objects count as the arbiter prepared! for _, _, strclss, _, _ in list( managed_conf_part.types_creations.values( )): # These elements are not included in the serialized configuration! if strclss in [ 'hostescalations', 'serviceescalations', 'arbiters', 'schedulers', 'brokers', 'pollers', 'reactionners', 'receivers', 'realms', 'modules', 'hostsextinfo', 'servicesextinfo', 'hostdependencies', 'servicedependencies' ]: continue objects_list = getattr(managed_conf_part, strclss, []) # print("Got %d %s: %s" % (len(objects_list), strclss, objects_list)) if not multi_realms: # Count and string dump are the same ! assert len(objects_list) == objects_map[ strclss]['count'] assert str(objects_list ) == objects_map[strclss]['str'] else: # Satellites print("I am: ") print(index, request.url, received) assert 'conf_part' not in conf assert 'see_my_schedulers' == conf[ 'managed_conf_id'] for link in my_dispatcher.all_daemons_links: mr.get('http://%s:%s/managed_configurations' % (link.address, link.port), json=link.cfg_managed) print("Check dispatching:") self.clear_logs() # assert my_dispatcher.check_dispatch() is True dispatched = my_dispatcher.check_dispatch() self.show_logs() assert dispatched for loop_count in range(0, loops): for tw in range(0, 4): # Time warp 1 second frozen_datetime.tick(delta=datetime.timedelta( seconds=1)) print("Check reachable %s" % tw) self.clear_logs() my_dispatcher.check_reachable() # Only for Python > 2.7, DEBUG logs ... if os.sys.version_info > (2, 7): for link in my_dispatcher.all_daemons_links: if link == my_dispatcher.arbiter_link: continue self.assert_any_log_match( re.escape("Too early to ping %s" % (link.name))) # Time warp 1 second frozen_datetime.tick(delta=datetime.timedelta(seconds=1)) print("Check reachable response") self.clear_logs() my_dispatcher.check_reachable() self.show_logs() # Only for Python > 2.7, DEBUG logs ... if os.sys.version_info > (2, 7): for link in my_dispatcher.all_daemons_links: if link == my_dispatcher.arbiter_link: continue self.assert_any_log_match( re.escape( "My (%s) fresh managed configuration: %s" % (link.name, link.cfg_managed)))
def setup_new_conf(self): # pylint: disable=too-many-branches, too-many-locals """Broker custom setup_new_conf method This function calls the base satellite treatment and manages the configuration needed for a broker daemon: - get and configure its pollers, reactionners and receivers relation - configure the modules :return: None """ # Execute the base class treatment... super(Broker, self).setup_new_conf() # ...then our own specific treatment! with self.conf_lock: # # self_conf is our own configuration from the fusionsupervision environment # self_conf = self.cur_conf['self_conf'] self.got_initial_broks = False # Now we create our pollers, reactionners and receivers for link_type in ['pollers', 'reactionners', 'receivers']: if link_type not in self.cur_conf['satellites']: logger.error("No %s in the configuration!", link_type) continue my_satellites = getattr(self, link_type, {}) received_satellites = self.cur_conf['satellites'][link_type] for link_uuid in received_satellites: rs_conf = received_satellites[link_uuid] logger.debug("- received %s - %s: %s", rs_conf['instance_id'], rs_conf['type'], rs_conf['name']) # Must look if we already had a configuration and save our broks already_got = rs_conf['instance_id'] in my_satellites broks = [] actions = {} wait_homerun = {} external_commands = {} running_id = 0 if already_got: logger.warning("I already got: %s", rs_conf['instance_id']) # Save some information running_id = my_satellites[link_uuid].running_id (broks, actions, wait_homerun, external_commands) = \ my_satellites[link_uuid].get_and_clear_context() # Delete the former link del my_satellites[link_uuid] # My new satellite link... new_link = SatelliteLink.get_a_satellite_link( link_type[:-1], rs_conf) my_satellites[new_link.uuid] = new_link logger.info("I got a new %s satellite: %s", link_type[:-1], new_link) new_link.running_id = running_id new_link.external_commands = external_commands new_link.broks = broks new_link.wait_homerun = wait_homerun new_link.actions = actions # Replace satellite address and port by those defined in satellite_map # todo: check if it is really necessary! Add a unit test for this # Not sure about this because of the daemons/satellites configuration # if new_link.name in self_conf.get('satellite_map', {}): # new_link = dict(new_link) # make a copy # new_link.update(self_conf.get('satellite_map', {})[new_link.name]) if not self.have_modules: try: self.modules = unserialize(self.cur_conf['modules'], no_load=True) except FusionsupervisionClassLookupException as exp: # pragma: no cover, simple protection logger.error( 'Cannot un-serialize modules configuration ' 'received from arbiter: %s', exp) if self.modules: logger.info("I received some modules configuration: %s", self.modules) self.have_modules = True # Ok now start, or restart them! # Set modules, init them and start external ones self.do_load_modules(self.modules) # and start external modules too self.modules_manager.start_external_instances() else: logger.info("I do not have modules") # Initialize connection with my schedulers first logger.info("Initializing connection with my schedulers:") my_satellites = self.get_links_of_type(s_type='scheduler') for satellite in list(my_satellites.values()): logger.info("- %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Initialize connection with all our satellites logger.info("Initializing connection with my satellites:") for sat_type in ['arbiter', 'reactionner', 'poller', 'receiver']: my_satellites = self.get_links_of_type(s_type=sat_type) for satellite in list(my_satellites.values()): logger.info("- %s/%s", satellite.type, satellite.name) if not self.daemon_connection_init(satellite): logger.error("Satellite connection failed: %s", satellite) # Now I have a configuration! self.have_conf = True