class Realm(Itemgroup): """Realm class is used to implement realm. It is basically a set of Host or Service assigned to a specific set of Scheduler/Poller (other daemon are optional) """ my_type = 'realm' properties = Itemgroup.properties.copy() properties.update({ 'uuid': StringProp(default='', fill_brok=['full_status']), 'realm_name': StringProp(fill_brok=['full_status']), 'alias': StringProp(default=''), # No status_broker_name because it put hosts, not host_name 'realm_members': ListProp(default=[], split_on_coma=True), 'higher_realms': ListProp(default=[], split_on_coma=True), 'default': BoolProp(default=False), }) running_properties = Item.running_properties.copy() running_properties.update({ 'serialized_confs': DictProp(default={}), 'unknown_higher_realms': ListProp(default=[]), 'all_sub_members': ListProp(default=[]), }) macros = { 'REALMNAME': 'realm_name', 'REALMMEMBERS': 'members', } potential_pollers = [] potential_reactionners = [] potential_brokers = [] potential_receivers = [] def get_name(self): """Accessor to realm_name attribute :return: realm name :rtype: str """ return self.realm_name def add_string_member(self, member): """Add a realm to all_sub_members attribute :param member: realm names to add :type member: list :return: None """ self.all_sub_members.extend(member) def get_realm_members(self): """ Get list of members of this realm :return: list of realm (members) :rtype: list """ # TODO: consistency: a Realm instance should always have its real_members defined, if hasattr(self, 'realm_members'): # more over it should already be decoded/parsed to its final type: # a list of strings (being the names of the members) return [r.strip() for r in self.realm_members] return [] def fill_realm_members_with_higher_realms(self, realms): """ if we have higher_realms defined, fill realm_members of the realm with my realm_name :param realms: list of all realms objects :type realms: list :return: None """ higher_realms = getattr(self, 'higher_realms', []) for realm_nane in higher_realms: realm = realms.find_by_name(realm_nane.strip()) if realm is not None: if not hasattr(realm, 'realm_members'): realm.realm_members = [] realm.realm_members.append(self.realm_name) def get_realms_by_explosion(self, realms): """Get all members of this realm including members of sub-realms on multi-levels :param realms: realms list, used to look for a specific one :type realms: alignak.objects.realm.Realms :return: list of members and add realm to realm_members attribute :rtype: list """ # The recursive part # rec_tag is set to False every HG we explode # so if True here, it must be a loop in HG # calls... not GOOD! if self.rec_tag: err = "Error: we've got a loop in realm definition %s" % self.get_name( ) self.configuration_errors.append(err) return None # Ok, not a loop, we tag it and continue self.rec_tag = True # we have yet exploded this realm if self.all_sub_members != []: return self.all_sub_members p_mbrs = self.get_realm_members() for p_mbr in p_mbrs: realm = realms.find_by_name(p_mbr.strip()) if realm is not None: value = realm.get_realms_by_explosion(realms) if value is None: # case loop problem self.all_sub_members = [] self.realm_members = [] return None elif value: self.add_string_member(value) self.add_string_member([realm.realm_name]) else: self.add_string_unknown_member(p_mbr.strip()) return self.all_sub_members def get_all_subs_satellites_by_type(self, sat_type, realms): """Get all satellites of the wanted type in this realm recursively :param sat_type: satellite type wanted (scheduler, poller ..) :type sat_type: :param realms: all realms :type realms: list of realm object :return: list of satellite in this realm :rtype: list TODO: Make this generic """ res = copy.copy(getattr(self, sat_type)) for member in self.all_sub_members: tmps = realms[member].get_all_subs_satellites_by_type( sat_type, realms) for mem in tmps: res.append(mem) return res def get_satellites_by_type(self, s_type): """Generic function to access one of the satellite attribute ie : self.pollers, self.reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.*type*s :rtype: list """ if hasattr(self, s_type + 's'): return getattr(self, s_type + 's') logger.debug("[realm] do not have this kind of satellites: %s", s_type) return [] def get_potential_satellites_by_type(self, s_type): """Generic function to access one of the potential satellite attribute ie : self.potential_pollers, self.potential_reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.potential_*type*s :rtype: list """ if hasattr(self, 'potential_' + s_type + 's'): return getattr(self, 'potential_' + s_type + 's') logger.debug("[realm] do not have this kind of satellites: %s", s_type) return [] def get_nb_of_must_have_satellites(self, s_type): """Generic function to access one of the number satellite attribute ie : self.nb_pollers, self.nb_reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.nb_*type*s :rtype: int """ if hasattr(self, 'nb_' + s_type + 's'): return getattr(self, 'nb_' + s_type + 's') logger.debug("[realm] do not have this kind of satellites: %s", s_type) return 0 def fill_broker_with_poller_reactionner_links(self, broker, pollers, reactionners, receivers, realms): """Fill brokerlink object with satellite data :param broker: broker link we want to fill :type broker: alignak.objects.brokerlink.Brokerlink :param pollers: pollers :type pollers: :param reactionners: reactionners :type reactionners: :param receivers: receivers :type receivers: :param realms: realms :type realms: :return: None """ # TODO: find a better name... # TODO: and if he goes active? # First we create/void theses links broker.cfg['pollers'] = {} broker.cfg['reactionners'] = {} broker.cfg['receivers'] = {} # First our own level for poller_id in self.pollers: poller = pollers[poller_id] cfg = poller.give_satellite_cfg() broker.cfg['pollers'][poller.uuid] = cfg for reactionner_id in self.reactionners: reactionner = reactionners[reactionner_id] cfg = reactionner.give_satellite_cfg() broker.cfg['reactionners'][reactionner.uuid] = cfg for receiver_id in self.receivers: receiver = receivers[receiver_id] cfg = receiver.give_satellite_cfg() broker.cfg['receivers'][receiver.uuid] = cfg # Then sub if we must to it if broker.manage_sub_realms: # Now pollers for poller_id in self.get_all_subs_satellites_by_type( 'pollers', realms): poller = pollers[poller_id] cfg = poller.give_satellite_cfg() broker.cfg['pollers'][poller.uuid] = cfg # Now reactionners for reactionner_id in self.get_all_subs_satellites_by_type( 'reactionners', realms): reactionner = reactionners[reactionner_id] cfg = reactionner.give_satellite_cfg() broker.cfg['reactionners'][reactionner.uuid] = cfg # Now receivers for receiver_id in self.get_all_subs_satellites_by_type( 'receivers', realms): receiver = receivers[receiver_id] cfg = receiver.give_satellite_cfg() broker.cfg['receivers'][receiver.uuid] = cfg def get_satellites_links_for_scheduler(self, pollers, reactionners, brokers): """Get a configuration dict with pollers, reactionners and brokers data for scheduler :return: dict containing pollers, reactionners and brokers config (key is satellite id) :rtype: dict """ # First we create/void theses links cfg = { 'pollers': {}, 'reactionners': {}, 'brokers': {}, } # First our own level for poller_id in self.pollers: poller = pollers[poller_id] config = poller.give_satellite_cfg() cfg['pollers'][poller.uuid] = config for reactionner_id in self.reactionners: reactionner = reactionners[reactionner_id] config = reactionner.give_satellite_cfg() cfg['reactionners'][reactionner.uuid] = config for broker_id in self.brokers: broker = brokers[broker_id] config = broker.give_satellite_cfg() cfg['brokers'][broker.uuid] = config return cfg
class SatelliteLink(Item): """SatelliteLink is a common Class for links between Arbiter and other satellites. Used by the Dispatcher object. """ # _id = 0 each Class will have it's own id properties = Item.properties.copy() properties.update({ 'address': StringProp(default='localhost', fill_brok=['full_status']), 'timeout': IntegerProp(default=3, fill_brok=['full_status']), 'data_timeout': IntegerProp(default=120, fill_brok=['full_status']), 'check_interval': IntegerProp(default=60, fill_brok=['full_status']), 'max_check_attempts': IntegerProp(default=3, fill_brok=['full_status']), 'spare': BoolProp(default=False, fill_brok=['full_status']), 'manage_sub_realms': BoolProp(default=True, fill_brok=['full_status']), 'manage_arbiters': BoolProp(default=False, fill_brok=['full_status'], to_send=True), 'modules': ListProp(default=[''], to_send=True, split_on_coma=True), 'polling_interval': IntegerProp(default=1, fill_brok=['full_status'], to_send=True), 'use_timezone': StringProp(default='NOTSET', to_send=True), 'realm': StringProp(default='', fill_brok=['full_status'], brok_transformation=get_obj_name_two_args_and_void), 'satellitemap': DictProp(default={}, elts_prop=AddrProp, to_send=True, override=True), 'use_ssl': BoolProp(default=False, fill_brok=['full_status']), 'hard_ssl_name_check': BoolProp(default=True, fill_brok=['full_status']), 'passive': BoolProp(default=False, fill_brok=['full_status'], to_send=True), }) running_properties = Item.running_properties.copy() running_properties.update({ 'con': StringProp(default=None), 'alive': BoolProp(default=True, fill_brok=['full_status']), 'broks': StringProp(default=[]), # the number of failed attempt 'attempt': StringProp(default=0, fill_brok=['full_status']), # can be network ask or not (dead or check in timeout or error) 'reachable': BoolProp(default=False, fill_brok=['full_status']), 'last_check': IntegerProp(default=0, fill_brok=['full_status']), 'managed_confs': StringProp(default={}), }) def __init__(self, *args, **kwargs): super(SatelliteLink, self).__init__(*args, **kwargs) self.arb_satmap = {'address': '0.0.0.0', 'port': 0} if hasattr(self, 'address'): self.arb_satmap['address'] = self.address if hasattr(self, 'port'): try: self.arb_satmap['port'] = int(self.port) except Exception: pass def get_name(self): """Get the name of the link based on its type if *mytype*_name is an attribute then returns self.*mytype*_name. otherwise returns "Unnamed *mytype*" Example : self.poller_name or "Unnamed poller" :return: String corresponding to the link name :rtype: str """ return getattr(self, "{0}_name".format(self.get_my_type()), "Unnamed {0}".format(self.get_my_type())) def set_arbiter_satellitemap(self, satellitemap): """ arb_satmap is the satellitemap in current context: - A SatelliteLink is owned by an Arbiter - satellitemap attribute of SatelliteLink is the map defined IN THE satellite configuration but for creating connections, we need the have the satellitemap of the Arbiter :return: None """ self.arb_satmap = {'address': self.address, 'port': self.port, 'use_ssl': self.use_ssl, 'hard_ssl_name_check': self.hard_ssl_name_check} self.arb_satmap.update(satellitemap) def create_connection(self): """Initialize HTTP connection with a satellite (con attribute) and set uri attribute :return: None """ self.con = HTTPClient(address=self.arb_satmap['address'], port=self.arb_satmap['port'], timeout=self.timeout, data_timeout=self.data_timeout, use_ssl=self.use_ssl, strong_ssl=self.hard_ssl_name_check ) self.uri = self.con.uri def put_conf(self, conf): """Send the conf (serialized) to the satellite HTTP request to the satellite (POST / put_conf) :param conf: The conf to send (data depend on the satellite) :type conf: :return: None """ if self.con is None: self.create_connection() # Maybe the connection was not ok, bail out if not self.con: return False try: self.con.get('ping') self.con.post('put_conf', {'conf': conf}, wait='long') print "PUT CONF SUCESS", self.get_name() return True except HTTPEXCEPTIONS, exp: self.con = None logger.error("Failed sending configuration for %s: %s", self.get_name(), str(exp)) return False
class Realm(Itemgroup): """Realm class is used to implement realm. It is basically a set of Host or Service assigned to a specific set of Scheduler/Poller (other daemon are optional) """ _id = 1 # zero is always a little bit special... like in database my_type = 'realm' properties = Itemgroup.properties.copy() properties.update({ '_id': IntegerProp(default=0, fill_brok=['full_status']), 'realm_name': StringProp(fill_brok=['full_status']), # No status_broker_name because it put hosts, not host_name 'realm_members': ListProp(default=[], split_on_coma=True), 'higher_realms': ListProp(default=[], split_on_coma=True), 'default': BoolProp(default=False), 'broker_complete_links': BoolProp(default=False), # 'alias': {'required': True, 'fill_brok': ['full_status']}, # 'notes': {'required': False, 'default':'', 'fill_brok': ['full_status']}, # 'notes_url': {'required': False, 'default':'', 'fill_brok': ['full_status']}, # 'action_url': {'required': False, 'default':'', 'fill_brok': ['full_status']}, }) running_properties = Item.running_properties.copy() running_properties.update({ 'serialized_confs': DictProp(default={}), }) macros = { 'REALMNAME': 'realm_name', 'REALMMEMBERS': 'members', } potential_pollers = [] potential_reactionners = [] potential_brokers = [] potential_receivers = [] def get_name(self): """Accessor to realm_name attribute :return: realm name :rtype: str """ return self.realm_name def get_realms(self): """ Get list of members of this realm :return: list of realm (members) :rtype: list TODO: Duplicate of get_realm_members """ return self.realm_members def add_string_member(self, member): """Add a realm to realm_members attribute :param member: realm name to add :type member: :return: None TODO : Clean this self.members != self.realm_members? """ self.realm_members.append(member) def get_realm_members(self): """ Get list of members of this realm :return: list of realm (members) :rtype: list """ # TODO: consistency: a Realm instance should always have its real_members defined, if hasattr(self, 'realm_members'): # more over it should already be decoded/parsed to its final type: # a list of strings (being the names of the members) return [r.strip() for r in self.realm_members] else: return [] def get_realms_by_explosion(self, realms): """Get all members of this realm including members of sub-realms :param realms: realms list, used to look for a specific one :type realms: alignak.objects.realm.Realms :return: list of members and add realm to realm_members attribute :rtype: list TODO: Clean this function that silently edit realm_members. """ # First we tag the hg so it will not be explode # if a son of it already call it self.already_explode = True # Now the recursive part # rec_tag is set to False every HG we explode # so if True here, it must be a loop in HG # calls... not GOOD! if self.rec_tag: err = "Error: we've got a loop in realm definition %s" % self.get_name( ) self.configuration_errors.append(err) if hasattr(self, 'members'): return self.members else: return [] # Ok, not a loop, we tag it and continue self.rec_tag = True p_mbrs = self.get_realm_members() for p_mbr in p_mbrs: realm = realms.find_by_name(p_mbr.strip()) if realm is not None: value = realm.get_realms_by_explosion(realms) if len(value) > 0: self.add_string_member(value) if hasattr(self, 'members'): return self.members else: return [] def get_all_subs_satellites_by_type(self, sat_type): """Get all satellites of the wated type in this realm recursively :param sat_type: satelitte type wanted (scheduler, poller ..) :type sat_type: :return: list of satellite in this realm :rtype: list TODO: Make this generic """ res = copy.copy(getattr(self, sat_type)) for member in self.realm_members: tmps = member.get_all_subs_satellites_by_type(sat_type) for mem in tmps: res.append(mem) return res def count_reactionners(self): """ Set the number of reactionners in this realm. :return: None TODO: Make this generic """ self.nb_reactionners = 0 for reactionner in self.reactionners: if not reactionner.spare: self.nb_reactionners += 1 for realm in self.higher_realms: for reactionner in realm.reactionners: if not reactionner.spare and reactionner.manage_sub_realms: self.nb_reactionners += 1 def count_pollers(self): """ Set the number of pollers in this realm. :return: None """ self.nb_pollers = 0 for poller in self.pollers: if not poller.spare: self.nb_pollers += 1 for realm in self.higher_realms: for poller in realm.pollers: if not poller.spare and poller.manage_sub_realms: self.nb_pollers += 1 def count_brokers(self): """ Set the number of brokers in this realm. :return: None TODO: Make this generic """ self.nb_brokers = 0 for broker in self.brokers: if not broker.spare: self.nb_brokers += 1 for realm in self.higher_realms: for broker in realm.brokers: if not broker.spare and broker.manage_sub_realms: self.nb_brokers += 1 def count_receivers(self): """ Set the number of receivers in this realm. :return: None TODO: Make this generic """ self.nb_receivers = 0 for receiver in self.receivers: if not receiver.spare: self.nb_receivers += 1 for realm in self.higher_realms: for receiver in realm.receivers: if not receiver.spare and receiver.manage_sub_realms: self.nb_receivers += 1 def get_satellties_by_type(self, s_type): """Generic function to access one of the satellite attribute ie : self.pollers, self.reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.*type*s :rtype: list """ if hasattr(self, s_type + 's'): return getattr(self, s_type + 's') else: logger.debug("[realm] do not have this kind of satellites: %s", s_type) return [] def fill_potential_satellites_by_type(self, sat_type): """Edit potential_*sat_type* attribute to get potential satellite from upper level realms :param sat_type: satellite type wanted :type sat_type: str :return: None """ setattr(self, 'potential_%s' % sat_type, []) for satellite in getattr(self, sat_type): getattr(self, 'potential_%s' % sat_type).append(satellite) for realm in self.higher_realms: for satellite in getattr(realm, sat_type): if satellite.manage_sub_realms: getattr(self, 'potential_%s' % sat_type).append(satellite) def get_potential_satellites_by_type(self, s_type): """Generic function to access one of the potential satellite attribute ie : self.potential_pollers, self.potential_reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.potential_*type*s :rtype: list """ if hasattr(self, 'potential_' + s_type + 's'): return getattr(self, 'potential_' + s_type + 's') else: logger.debug("[realm] do not have this kind of satellites: %s", s_type) return [] def get_nb_of_must_have_satellites(self, s_type): """Generic function to access one of the number satellite attribute ie : self.nb_pollers, self.nb_reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.nb_*type*s :rtype: int """ if hasattr(self, 'nb_' + s_type + 's'): return getattr(self, 'nb_' + s_type + 's') else: logger.debug("[realm] do not have this kind of satellites: %s", s_type) return 0 # Fill dict of realms for managing the satellites confs def prepare_for_satellites_conf(self): """Init the following attributes:: * to_satellites (with *satellite type* keys) * to_satellites_need_dispatch (with *satellite type* keys) * to_satellites_managed_by (with *satellite type* keys) * nb_*satellite type*s * self.potential_*satellite type*s (satellite type are reactionner, poller, broker and receiver) :return: None """ self.to_satellites = { 'reactionner': {}, 'poller': {}, 'broker': {}, 'receiver': {} } self.to_satellites_need_dispatch = { 'reactionner': {}, 'poller': {}, 'broker': {}, 'receiver': {} } self.to_satellites_managed_by = { 'reactionner': {}, 'poller': {}, 'broker': {}, 'receiver': {} } self.count_reactionners() self.fill_potential_satellites_by_type('reactionners') self.count_pollers() self.fill_potential_satellites_by_type('pollers') self.count_brokers() self.fill_potential_satellites_by_type('brokers') self.count_receivers() self.fill_potential_satellites_by_type('receivers') line = "%s: (in/potential) (schedulers:%d) (pollers:%d/%d)" \ " (reactionners:%d/%d) (brokers:%d/%d) (receivers:%d/%d)" % \ (self.get_name(), len(self.schedulers), self.nb_pollers, len(self.potential_pollers), self.nb_reactionners, len(self.potential_reactionners), self.nb_brokers, len(self.potential_brokers), self.nb_receivers, len(self.potential_receivers) ) logger.info(line) def fill_broker_with_poller_reactionner_links(self, broker): """Fill brokerlink object with satellite data :param broker: broker link we want to fill :type broker: alignak.objects.brokerlink.Brokerlink :return: None """ # TODO: find a better name... # TODO: and if he goes active? # First we create/void theses links broker.cfg['pollers'] = {} broker.cfg['reactionners'] = {} broker.cfg['receivers'] = {} # First our own level for poller in self.pollers: cfg = poller.give_satellite_cfg() broker.cfg['pollers'][poller._id] = cfg for reactionner in self.reactionners: cfg = reactionner.give_satellite_cfg() broker.cfg['reactionners'][reactionner._id] = cfg for receiver in self.receivers: cfg = receiver.give_satellite_cfg() broker.cfg['receivers'][receiver._id] = cfg # Then sub if we must to it if broker.manage_sub_realms: # Now pollers for poller in self.get_all_subs_satellites_by_type('pollers'): cfg = poller.give_satellite_cfg() broker.cfg['pollers'][poller._id] = cfg # Now reactionners for reactionner in self.get_all_subs_satellites_by_type( 'reactionners'): cfg = reactionner.give_satellite_cfg() broker.cfg['reactionners'][reactionner._id] = cfg # Now receivers for receiver in self.get_all_subs_satellites_by_type('receivers'): cfg = receiver.give_satellite_cfg() broker.cfg['receivers'][receiver._id] = cfg def get_satellites_links_for_scheduler(self): """Get a configuration dict with pollers and reactionners data :return: dict containing pollers and reactionners config (key is satellite id) :rtype: dict """ # First we create/void theses links cfg = {'pollers': {}, 'reactionners': {}} # First our own level for poller in self.pollers: config = poller.give_satellite_cfg() cfg['pollers'][poller._id] = config for reactionner in self.reactionners: config = reactionner.give_satellite_cfg() cfg['reactionners'][reactionner._id] = config # print "***** Preparing a satellites conf for a scheduler", cfg return cfg
class SatelliteLink(Item): # pylint: disable=too-many-instance-attributes """SatelliteLink is a common Class for links between Arbiter and other satellites. Used by the Dispatcher object. """ # Next value used for auto generated instance_id _next_id = 1 # All the class properties that are 'to_send' are stored in the 'global' # configuration to be pushed to the satellite when the configuration is dispatched properties = Item.properties.copy() properties.update({ 'instance_id': StringProp(to_send=True), # When this property is set, the Arbiter will launch the corresponding daemon 'alignak_launched': BoolProp(default=False, fill_brok=['full_status'], to_send=True), # This property is set by the Arbiter when it detects that this daemon # is needed but not declared in the configuration 'missing_daemon': BoolProp(default=False, fill_brok=['full_status']), # Sent to the satellites and used to check the managed configuration # Those are not to_send=True because they are updated by the configuration Dispatcher # and set when the daemon receives its configuration 'managed_conf_id': StringProp(default=u''), 'push_flavor': StringProp(default=u''), 'hash': StringProp(default=u''), # A satellite link has the type/name of the daemon it is related to 'type': StringProp(default=u'', fill_brok=['full_status'], to_send=True), 'name': StringProp(default=u'', fill_brok=['full_status'], to_send=True), # Listening interface and address used by the other daemons 'host': StringProp(default=u'0.0.0.0', to_send=True), 'address': StringProp(default=u'127.0.0.1', fill_brok=['full_status'], to_send=True), 'active': BoolProp(default=True, fill_brok=['full_status'], to_send=True), 'short_timeout': IntegerProp(default=3, fill_brok=['full_status'], to_send=True), 'long_timeout': IntegerProp(default=120, fill_brok=['full_status'], to_send=True), # the delay (seconds) between two ping retries 'ping_period': IntegerProp(default=5), # The maximum number of retries before setting the daemon as dead 'max_check_attempts': IntegerProp(default=3, fill_brok=['full_status']), # For a spare daemon link 'spare': BoolProp(default=False, fill_brok=['full_status'], to_send=True), 'spare_check_interval': IntegerProp(default=5, fill_brok=['full_status']), 'spare_max_check_attempts': IntegerProp(default=3, fill_brok=['full_status']), 'manage_sub_realms': BoolProp(default=True, fill_brok=['full_status'], to_send=True), 'manage_arbiters': BoolProp(default=False, fill_brok=['full_status'], to_send=True), 'modules': ListProp(default=[''], split_on_comma=True), 'polling_interval': IntegerProp(default=5, fill_brok=['full_status'], to_send=True), 'use_timezone': StringProp(default=u'NOTSET', to_send=True), 'realm': StringProp(default=u'', fill_brok=['full_status'], brok_transformation=get_obj_name_two_args_and_void), 'realm_name': StringProp(default=u''), 'satellite_map': DictProp(default={}, elts_prop=AddrProp, to_send=True, override=True), 'use_ssl': BoolProp(default=False, fill_brok=['full_status'], to_send=True), 'hard_ssl_name_check': BoolProp(default=True, fill_brok=['full_status'], to_send=True), 'passive': BoolProp(default=False, fill_brok=['full_status'], to_send=True), }) running_properties = Item.running_properties.copy() running_properties.update({ 'con': StringProp(default=None), 'uri': StringProp(default=None), 'reachable': # Can be reached - assumed True as default ;) BoolProp(default=False, fill_brok=['full_status']), 'alive': # Is alive (attached process s launched...) BoolProp(default=False, fill_brok=['full_status']), 'valid': # Is valid (the daemon is the expected one) BoolProp(default=False, fill_brok=['full_status']), 'need_conf': # The daemon needs to receive a configuration BoolProp(default=True, fill_brok=['full_status']), 'have_conf': # The daemon has received a configuration BoolProp(default=False, fill_brok=['full_status']), 'stopping': # The daemon is requested to stop BoolProp(default=False, fill_brok=['full_status']), 'running_id': # The running identifier of my related daemon FloatProp(default=0, fill_brok=['full_status']), # the number of poll attempt from the arbiter dispatcher 'attempt': IntegerProp(default=0, fill_brok=['full_status']), # the last connection attempt timestamp 'last_connection': IntegerProp(default=0, fill_brok=['full_status']), # the number of failed attempt for the connection 'connection_attempt': IntegerProp(default=0, fill_brok=['full_status']), 'last_check': IntegerProp(default=0, fill_brok=['full_status']), 'cfg_managed': DictProp(default=None), 'cfg_to_manage': DictProp(default={}), 'configuration_sent': BoolProp(default=False), 'statistics': DictProp(default={}), }) def __init__(self, params=None, parsing=True): """Initialize a SatelliteLink If parsing is True, we are initializing from a configuration, else we are initializing from a copy of another satellite link data. This is used when the daemons receive their configuration from the arbiter. When initializing from an arbiter configuration, an instance_id property must exist else a LinkError exception is raised! If a satellite_map property exists in the provided parameters, it will update the default existing one """ super(SatelliteLink, self).__init__(params, parsing) logger.debug("Initialize a %s, params: %s", self.__class__.__name__, params) # My interface context self.broks = [] self.actions = {} self.wait_homerun = {} self.pushed_commands = [] self.init_running_properties() if parsing: # Create a new satellite link identifier self.instance_id = u'%s_%d' % (self.__class__.__name__, self.__class__._next_id) self.__class__._next_id += 1 elif 'instance_id' not in params: raise LinkError("When not parsing a configuration, " "an instance_id must exist in the provided parameters") self.fill_default() # Hack for ascending compatibility with Shinken configuration try: # We received a configuration with a 'name' property... if self.name: setattr(self, "%s_name" % self.type, self.name) else: # We received a configuration without a 'name' property... old form! if getattr(self, "%s_name" % self.type, None): setattr(self, 'name', getattr(self, "%s_name" % self.type)) else: self.name = "Unnamed %s" % self.type setattr(self, "%s_name" % self.type, self.name) except KeyError: setattr(self, 'name', getattr(self, "%s_name" % self.type)) # Initialize our satellite map, and update if required self.set_arbiter_satellite_map(params.get('satellite_map', {})) self.cfg = { 'self_conf': {}, 'schedulers': {}, 'arbiters': {} } # Create the daemon connection self.create_connection() def __repr__(self): # pragma: no cover return '<%s - %s/%s, %s//%s:%s, rid: %s, spare: %s, realm: %s, sub-realms: %s, ' \ 'managing: %s (%s) />' \ % (self.instance_id, self.type, self.name, self.scheme, self.address, self.port, self.running_id, self.spare, self.realm, self.manage_sub_realms, self.managed_conf_id, self.push_flavor) __str__ = __repr__ @property def scheme(self): """Daemon interface scheme :return: http or https if the daemon uses SSL :rtype: str """ _scheme = 'http' if self.use_ssl: _scheme = 'https' return _scheme @staticmethod def get_a_satellite_link(sat_type, sat_dict): """Get a SatelliteLink object for a given satellite type and a dictionary :param sat_type: type of satellite :param sat_dict: satellite configuration data :return: """ cls = get_alignak_class('alignak.objects.%slink.%sLink' % (sat_type, sat_type.capitalize())) return cls(params=sat_dict, parsing=False) def get_livestate(self): """Get the SatelliteLink live state. The live state is a tuple information containing a state identifier and a message, where: state is: - 0 for an up and running satellite - 1 if the satellite is not reachale - 2 if the satellite is dead - 3 else (not active) :return: tuple """ livestate = 0 if self.active: if not self.reachable: livestate = 1 elif not self.alive: livestate = 2 else: livestate = 3 livestate_output = "%s/%s is %s" % (self.type, self.name, [ "up and running.", "warning because not reachable.", "critical because not responding.", "not active by configuration." ][livestate]) return (livestate, livestate_output) def set_arbiter_satellite_map(self, satellite_map=None): """ satellite_map is the satellites map in current context: - A SatelliteLink is owned by an Arbiter - satellite_map attribute of a SatelliteLink is the map defined IN THE satellite configuration but for creating connections, we need to have the satellites map from the Arbiter point of view :return: None """ self.satellite_map = { 'address': self.address, 'port': self.port, 'use_ssl': self.use_ssl, 'hard_ssl_name_check': self.hard_ssl_name_check } if satellite_map: self.satellite_map.update(satellite_map) def get_and_clear_context(self): """Get and clean all of our broks, actions, external commands and homerun :return: list of all broks of the satellite link :rtype: list """ res = (self.broks, self.actions, self.wait_homerun, self.pushed_commands) self.broks = [] self.actions = {} self.wait_homerun = {} self.pushed_commands = [] return res def get_and_clear_broks(self): """Get and clean all of our broks :return: list of all broks of the satellite link :rtype: list """ res = self.broks self.broks = [] return res def prepare_for_conf(self): """Initialize the pushed configuration dictionary with the inner properties that are to be propagated to the satellite link. :return: None """ logger.debug("- preparing: %s", self) self.cfg = { 'self_conf': self.give_satellite_cfg(), 'schedulers': {}, 'arbiters': {} } logger.debug("- prepared: %s", self.cfg) def give_satellite_cfg(self): """Get the default information for a satellite. Overridden by the specific satellites links :return: dictionary of information common to all the links :rtype: dict """ # All the satellite link class properties that are 'to_send' are stored in a # dictionary to be pushed to the satellite when the configuration is dispatched res = {} properties = self.__class__.properties for prop, entry in list(properties.items()): if hasattr(self, prop) and entry.to_send: res[prop] = getattr(self, prop) return res def give_satellite_json(self): """Get the json information for a satellite. This to provide information that will be exposed by a daemon on its HTTP interface. :return: dictionary of information common to all the links :rtype: dict """ daemon_properties = ['type', 'name', 'uri', 'spare', 'configuration_sent', 'realm_name', 'manage_sub_realms', 'active', 'reachable', 'alive', 'passive', 'last_check', 'polling_interval', 'max_check_attempts'] (livestate, livestate_output) = self.get_livestate() res = { "livestate": livestate, "livestate_output": livestate_output } for sat_prop in daemon_properties: res[sat_prop] = getattr(self, sat_prop, 'not_yet_defined') return res def manages(self, cfg_part): """Tell if the satellite is managing this configuration part The managed configuration is formed as a dictionary indexed on the link instance_id: { u'SchedulerLink_1': { u'hash': u'4d08630a3483e1eac7898e7a721bd5d7768c8320', u'push_flavor': u'4d08630a3483e1eac7898e7a721bd5d7768c8320', u'managed_conf_id': [u'Config_1'] } } Note that the managed configuration is a string array rather than a simple string... no special for this reason, probably due to the serialization when the configuration is pushed :/ :param cfg_part: configuration part as prepare by the Dispatcher :type cfg_part: Conf :return: True if the satellite manages this configuration :rtype: bool """ logger.debug("Do I (%s/%s) manage: %s, my managed configuration(s): %s", self.type, self.name, cfg_part, self.cfg_managed) # If we do not yet manage a configuration if not self.cfg_managed: logger.info("I (%s/%s) do not manage (yet) any configuration!", self.type, self.name) return False # Check in the schedulers list configurations for managed_cfg in list(self.cfg_managed.values()): # If not even the cfg_id in the managed_conf, bail out if managed_cfg['managed_conf_id'] == cfg_part.instance_id \ and managed_cfg['push_flavor'] == cfg_part.push_flavor: logger.debug("I do manage this configuration: %s", cfg_part) break else: logger.warning("I (%s/%s) do not manage this configuration: %s", self.type, self.name, cfg_part) return False return True def create_connection(self): """Initialize HTTP connection with a satellite (con attribute) and set its uri attribute This is called on the satellite link initialization :return: None """ # Create the HTTP client for the connection try: self.con = HTTPClient(address=self.satellite_map['address'], port=self.satellite_map['port'], short_timeout=self.short_timeout, long_timeout=self.long_timeout, use_ssl=self.satellite_map['use_ssl'], strong_ssl=self.satellite_map['hard_ssl_name_check']) self.uri = self.con.uri except HTTPClientException as exp: # logger.error("Error with '%s' when creating client: %s", self.name, str(exp)) # Set the satellite as dead self.set_dead() raise LinkError("Error with '%s' when creating client: %s" % (self.name, str(exp))) def set_alive(self): """Set alive, reachable, and reset attempts. If we change state, raise a status brok update alive, means the daemon is prenset in the system reachable, means that the HTTP connection is valid With this function we confirm that the daemon is reachable and, thus, we assume it is alive! :return: None """ was_alive = self.alive self.alive = True self.reachable = True self.attempt = 0 # We came from dead to alive! We must propagate the good news if not was_alive: logger.info("Setting %s satellite as alive :)", self.name) self.broks.append(self.get_update_status_brok()) def set_dead(self): """Set the satellite into dead state: If we change state, raise a status brok update :return:None """ was_alive = self.alive self.alive = False self.reachable = False self.attempt = 0 # We will have to create a new connection... self.con = None # We are dead now! We must propagate the sad news... if was_alive and not self.stopping: logger.warning("Setting the satellite %s as dead :(", self.name) self.broks.append(self.get_update_status_brok()) def add_failed_check_attempt(self, reason=''): """Set the daemon as unreachable and add a failed attempt if we reach the maximum attempts, set the daemon as dead :param reason: the reason of adding an attempts (stack trace sometimes) :type reason: str :return: None """ self.reachable = False self.attempt = self.attempt + 1 logger.debug("Failed attempt for %s (%d/%d), reason: %s", self.name, self.attempt, self.max_check_attempts, reason) # Don't need to warn again and again if the satellite is already dead # Only warn when it is alive if self.alive: if not self.stopping: logger.warning("Add failed attempt for %s (%d/%d) - %s", self.name, self.attempt, self.max_check_attempts, reason) else: logger.info("Stopping... failed attempt for %s (%d/%d) - also probably stopping", self.name, self.attempt, self.max_check_attempts) # If we reached the maximum attempts, set the daemon as dead if self.attempt >= self.max_check_attempts: if not self.stopping: logger.warning("Set %s as dead, too much failed attempts (%d), last problem is: %s", self.name, self.max_check_attempts, reason) else: logger.info("Stopping... set %s as dead, too much failed attempts (%d)", self.name, self.max_check_attempts) self.set_dead() def valid_connection(*outer_args, **outer_kwargs): # pylint: disable=unused-argument, no-method-argument """Check if the daemon connection is established and valid""" def decorator(func): # pylint: disable=missing-docstring def decorated(*args, **kwargs): # pylint: disable=missing-docstring # outer_args and outer_kwargs are the decorator arguments # args and kwargs are the decorated function arguments link = args[0] if not link.con: raise LinkError("The connection is not created for %s" % link.name) if not link.running_id: raise LinkError("The connection is not initialized for %s" % link.name) return func(*args, **kwargs) return decorated return decorator def communicate(*outer_args, **outer_kwargs): # pylint: disable=unused-argument, no-method-argument """Check if the daemon connection is authorized and valid""" def decorator(func): # pylint: disable=missing-docstring def decorated(*args, **kwargs): # pylint: disable=missing-docstring # outer_args and outer_kwargs are the decorator arguments # args and kwargs are the decorated function arguments fn_name = func.__name__ link = args[0] if not link.alive: logger.warning("%s is not alive for %s", link.name, fn_name) return None try: if not link.reachable: raise LinkError("The %s %s is not reachable" % (link.type, link.name)) logger.debug("[%s] Calling: %s, %s, %s", link.name, fn_name, args, kwargs) return func(*args, **kwargs) except HTTPClientConnectionException as exp: # A Connection error is raised when the daemon connection cannot be established # No way with the configuration parameters! if not link.stopping: logger.warning("A daemon (%s/%s) that we must be related with " "cannot be connected: %s", link.type, link.name, exp) else: logger.info("Stopping... daemon (%s/%s) cannot be connected. " "It is also probably stopping or yet stopped.", link.type, link.name) link.set_dead() except (LinkError, HTTPClientTimeoutException) as exp: link.add_failed_check_attempt("Connection timeout " "with '%s': %s" % (fn_name, str(exp))) return False except HTTPClientDataException as exp: # A Data error is raised when the daemon HTTP reponse is not 200! # No way with the communication if some problems exist in the daemon interface! # Abort all err = "Some daemons that we must be related with " \ "have some interface problems. Sorry, I bail out" logger.error(err) os.sys.exit(err) except HTTPClientException as exp: link.add_failed_check_attempt("Error with '%s': %s" % (fn_name, str(exp))) return None return decorated return decorator @communicate() def get_running_id(self): """Send a HTTP request to the satellite (GET /identity) Used to get the daemon running identifier that allows to know if the daemon got restarted This is called on connection initialization or re-connection If the daemon is notreachable, this function will raise an exception and the caller will receive a False as return :return: Boolean indicating if the running id was received :type: bool """ former_running_id = self.running_id logger.info(" get the running identifier for %s %s.", self.type, self.name) # An exception is raised in this function if the daemon is not reachable self.running_id = self.con.get('identity') if isinstance(self.running_id, dict): self.running_id = self.running_id['running_id'] if former_running_id == 0: if self.running_id: logger.info(" -> got: %s.", self.running_id) former_running_id = self.running_id # If the daemon has just started or has been restarted: it has a new running_id. if former_running_id != self.running_id: if former_running_id: logger.info(" -> The %s %s running identifier changed: %s. " "The daemon was certainly restarted!", self.type, self.name, self.running_id) # So we clear all verifications, they are obsolete now. logger.info("The running id of the %s %s changed (%s), " "we must clear its context.", self.type, self.name, self.running_id) (_, _, _, _) = self.get_and_clear_context() # Set the daemon as alive self.set_alive() return True @valid_connection() @communicate() def stop_request(self, stop_now=False): """Send a stop request to the daemon :param stop_now: stop now or go to stop wait mode :type stop_now: bool :return: the daemon response (True) """ logger.debug("Sending stop request to %s, stop now: %s", self.name, stop_now) res = self.con.get('stop_request', {'stop_now': '1' if stop_now else '0'}) return res @valid_connection() @communicate() def update_infos(self, forced=False, test=False): """Update satellite info each self.polling_interval seconds so we smooth arbiter actions for just useful actions. Raise a satellite update status Brok If forced is True, then ignore the ping period. This is used when the configuration has not yet been dispatched to the Arbiter satellites. If test is True, do not really ping the daemon (useful for the unit tests only) :param forced: ignore the ping smoothing :type forced: bool :param test: :type test: bool :return: None if the last request is too recent, False if a timeout was raised during the request, else the managed configurations dictionary """ logger.debug("Update informations, forced: %s", forced) # First look if it's not too early to ping now = time.time() if not forced and self.last_check and self.last_check + self.polling_interval > now: logger.debug("Too early to ping %s, ping period is %ds!, last check: %d, now: %d", self.name, self.polling_interval, self.last_check, now) return None self.get_conf(test=test) # Update the daemon last check timestamp self.last_check = time.time() # Update the state of this element self.broks.append(self.get_update_status_brok()) return self.cfg_managed @valid_connection() @communicate() def get_daemon_stats(self, details=False): """Send a HTTP request to the satellite (GET /get_daemon_stats) :return: Daemon statistics :rtype: dict """ logger.debug("Get daemon statistics for %s, %s %s", self.name, self.alive, self.reachable) return self.con.get('stats%s' % ('?details=1' if details else '')) @valid_connection() @communicate() def get_initial_broks(self, broker_name): """Send a HTTP request to the satellite (GET /_initial_broks) Used to build the initial broks for a broker connecting to a scheduler :param broker_name: the concerned broker name :type broker_name: str :return: Boolean indicating if the running id changed :type: bool """ logger.debug("Getting initial broks for %s, %s %s", self.name, self.alive, self.reachable) return self.con.get('_initial_broks', {'broker_name': broker_name}, wait=True) @valid_connection() @communicate() def wait_new_conf(self): """Send a HTTP request to the satellite (GET /wait_new_conf) :return: True if wait new conf, otherwise False :rtype: bool """ logger.debug("Wait new configuration for %s, %s %s", self.name, self.alive, self.reachable) return self.con.get('_wait_new_conf') @valid_connection() @communicate() def put_conf(self, configuration, test=False): """Send the configuration to the satellite HTTP request to the satellite (POST /push_configuration) If test is True, store the configuration internally :param configuration: The conf to send (data depend on the satellite) :type configuration: :return: None """ logger.debug("Sending configuration to %s, %s %s", self.name, self.alive, self.reachable) # ---------- if test: setattr(self, 'unit_test_pushed_configuration', configuration) # print("*** unit tests - sent configuration %s: %s" % (self.name, configuration)) return True # ---------- return self.con.post('_push_configuration', {'conf': configuration}, wait=True) @valid_connection() @communicate() def has_a_conf(self, magic_hash=None): # pragma: no cover """Send a HTTP request to the satellite (GET /have_conf) Used to know if the satellite has a conf :param magic_hash: Config hash. Only used for HA arbiter communication :type magic_hash: int :return: Boolean indicating if the satellite has a (specific) configuration :type: bool """ logger.debug("Have a configuration for %s, %s %s", self.name, self.alive, self.reachable) self.have_conf = self.con.get('_have_conf', {'magic_hash': magic_hash}) return self.have_conf @valid_connection() @communicate() def get_conf(self, test=False): """Send a HTTP request to the satellite (GET /managed_configurations) and update the cfg_managed attribute with the new information Set to {} on failure the managed configurations are a dictionary which keys are the scheduler link instance id and the values are the push_flavor If test is True, returns the unit test internally stored configuration Returns False if a timeout is raised :return: see @communicate, or the managed configuration """ logger.debug("Get managed configuration for %s, %s %s", self.name, self.alive, self.reachable) # ---------- if test: self.cfg_managed = {} self.have_conf = True logger.debug("Get managed configuration test ...") if getattr(self, 'unit_test_pushed_configuration', None) is not None: # Note this is a dict not a SatelliteLink object ! for scheduler_link in self.unit_test_pushed_configuration['schedulers'].values(): self.cfg_managed[scheduler_link['instance_id']] = { 'hash': scheduler_link['hash'], 'push_flavor': scheduler_link['push_flavor'], 'managed_conf_id': scheduler_link['managed_conf_id'] } # print("*** unit tests - get managed configuration %s: %s" # % (self.name, self.cfg_managed)) # ---------- else: self.cfg_managed = self.con.get('managed_configurations') logger.debug("My (%s) fresh managed configuration: %s", self.name, self.cfg_managed) self.have_conf = (self.cfg_managed != {}) return self.cfg_managed @valid_connection() @communicate() def push_broks(self, broks): """Send a HTTP request to the satellite (POST /push_broks) Send broks to the satellite :param broks: Brok list to send :type broks: list :return: True on success, False on failure :rtype: bool """ logger.debug("[%s] Pushing %d broks", self.name, len(broks)) return self.con.post('_push_broks', {'broks': broks}, wait=True) @valid_connection() @communicate() def push_actions(self, actions, scheduler_instance_id): """Post the actions to execute to the satellite. Indeed, a scheduler post its checks to a poller and its actions to a reactionner. :param actions: Action list to send :type actions: list :param scheduler_instance_id: Scheduler instance identifier :type scheduler_instance_id: uuid :return: True on success, False on failure :rtype: bool """ logger.debug("Pushing %d actions from %s", len(actions), scheduler_instance_id) return self.con.post('_push_actions', {'actions': actions, 'scheduler_instance_id': scheduler_instance_id}, wait=True) @valid_connection() @communicate() def push_results(self, results, scheduler_name): """Send a HTTP request to the satellite (POST /put_results) Send actions results to the satellite :param results: Results list to send :type results: list :param scheduler_name: Scheduler name :type scheduler_name: uuid :return: True on success, False on failure :rtype: bool """ logger.debug("Pushing %d results", len(results)) result = self.con.post('put_results', {'results': results, 'from': scheduler_name}, wait=True) return result @valid_connection() @communicate() def push_external_commands(self, commands): """Send a HTTP request to the satellite (POST /r_un_external_commands) to send the external commands to the satellite :param results: Results list to send :type results: list :return: True on success, False on failure :rtype: bool """ logger.debug("Pushing %d external commands", len(commands)) return self.con.post('_run_external_commands', {'cmds': commands}, wait=True) @valid_connection() @communicate() def get_external_commands(self): """Send a HTTP request to the satellite (GET /_external_commands) to get the external commands from the satellite. :return: External Command list on success, [] on failure :rtype: list """ res = self.con.get('_external_commands', wait=False) logger.debug("Got %d external commands from %s: %s", len(res), self.name, res) return unserialize(res, True) @valid_connection() @communicate() def get_broks(self, broker_name): """Send a HTTP request to the satellite (GET /_broks) Get broks from the satellite. Un-serialize data received. :param broker_name: the concerned broker link :type broker_name: BrokerLink :return: Broks list on success, [] on failure :rtype: list """ res = self.con.get('_broks', {'broker_name': broker_name}, wait=False) logger.debug("Got broks from %s: %s", self.name, res) return unserialize(res, True) @valid_connection() @communicate() def get_events(self): """Send a HTTP request to the satellite (GET /_events) Get monitoring events from the satellite. :return: Broks list on success, [] on failure :rtype: list """ res = self.con.get('_events', wait=False) logger.debug("Got events from %s: %s", self.name, res) return unserialize(res, True) @valid_connection() def get_results(self, scheduler_instance_id): """Send a HTTP request to the satellite (GET /_results) Get actions results from satellite (only passive satellites expose this method. :param scheduler_instance_id: scheduler instance identifier :type scheduler_instance_id: str :return: Results list on success, [] on failure :rtype: list """ res = self.con.get('_results', {'scheduler_instance_id': scheduler_instance_id}, wait=True) logger.debug("Got %d results from %s: %s", len(res), self.name, res) return res @valid_connection() def get_actions(self, params): """Send a HTTP request to the satellite (GET /_checks) Get actions from the scheduler. Un-serialize data received. :param params: the request parameters :type params: str :return: Actions list on success, [] on failure :rtype: list """ res = self.con.get('_checks', params, wait=True) logger.debug("Got checks to execute from %s: %s", self.name, res) return unserialize(res, True)
class Realm(Itemgroup): """Realm class is used to implement realm. It is basically a group of Hosts assigned to a specific Scheduler/Poller (other daemon are optional) """ my_type = 'realm' members_property = "members" group_members_property = "realm_members" properties = Itemgroup.properties.copy() properties.update({ 'realm_name': StringProp(default=u'', fill_brok=['full_status']), 'alias': StringProp(default=u'', fill_brok=['full_status']), 'realm_members': ListProp(default=[], split_on_comma=True), 'group_members': ListProp(default=[], split_on_comma=True), 'higher_realms': ListProp(default=[], split_on_comma=True), 'default': BoolProp(default=False) }) running_properties = Itemgroup.running_properties.copy() running_properties.update({ # Indicate if some only passively or actively checks host exist in the realm 'passively_checked_hosts': BoolProp(default=None), 'actively_checked_hosts': BoolProp(default=None), # Those lists contain only the uuid of the satellite link, not the whole object! 'arbiters': ListProp(default=[]), 'schedulers': ListProp(default=[]), 'brokers': ListProp(default=[]), 'pollers': ListProp(default=[]), 'reactionners': ListProp(default=[]), 'receivers': ListProp(default=[]), 'potential_schedulers': ListProp(default=[]), 'potential_brokers': ListProp(default=[]), 'potential_pollers': ListProp(default=[]), 'potential_reactionners': ListProp(default=[]), 'potential_receivers': ListProp(default=[]), # Once configuration is prepared, the count of the hosts in the realm 'hosts_count': IntegerProp(default=0), 'packs': DictProp(default={}), 'parts': DictProp(default={}), # Realm level in the realms hierarchy 'level': IntegerProp(default=-1), # All the sub realms (children and grand-children) 'all_sub_members': ListProp(default=[]), 'all_sub_members_names': ListProp(default=[]), }) macros = { 'REALMNAME': 'realm_name', 'REALMDEFAULT': 'default', 'REALMMEMBERS': 'members', 'REALMREALM_MEMBERS': 'realm_members', 'REALMGROUP_MEMBERS': 'group_members', 'REALMHOSTS_COUNT': 'hosts_count', } def __init__(self, params=None, parsing=True): super(Realm, self).__init__(params, parsing) self.fill_default() # Define a packs list for the configuration preparation self.packs = [] # Once the configuration got prepared, packs becomes a dictionary! # packs is a dictionary indexed with the configuration part # number and containing the list of hosts # List of satellites related to the realm self.to_satellites = { 'reactionner': {}, 'poller': {}, 'broker': {}, 'receiver': {} } # List of satellites that need a configuration dispatch self.to_satellites_need_dispatch = { 'reactionner': {}, 'poller': {}, 'broker': {}, 'receiver': {} } # List of satellites with their managed configuration self.to_satellites_managed_by = { 'reactionner': {}, 'poller': {}, 'broker': {}, 'receiver': {} } # Attributes depending of the satellite type for sat_type in [ 'arbiter', 'scheduler', 'reactionner', 'poller', 'broker', 'receiver' ]: # Minimum is to have one satellite setattr(self, "nb_%ss" % sat_type, 0) setattr(self, 'potential_%ss' % sat_type, []) def __repr__(self): res = '<%r %r (%d)' % (self.__class__.__name__, self.get_name(), self.level) if self.realm_members: res = res + ', %d sub-realms: %r' \ % (len(self.realm_members), ', '.join([str(s) for s in self.realm_members])) if self.all_sub_members_names: res = res + ', %d all sub-realms: %r' \ % (len(self.all_sub_members_names), ', '.join([str(s) for s in self.all_sub_members_names])) if self.hosts_count: res = res + ', %d hosts' % self.hosts_count if getattr(self, 'parts', None): res = res + ', %d parts' % len(self.parts) if getattr(self, 'packs', None): res = res + ', %d packs' % len(self.packs) return res + '/>' __str__ = __repr__ @property def name(self): """Get the realm name""" return self.get_name() def get_name(self): """Accessor to realm_name attribute :return: realm name :rtype: str """ return getattr(self, 'realm_name', 'unset') def add_group_members(self, members): """Add a new group member to the groups list :param members: member name :type members: str :return: None """ if not isinstance(members, list): members = [members] if not getattr(self, 'group_members', None): self.group_members = members else: self.group_members.extend(members) def prepare_satellites(self, satellites): """Update the following attributes of a realm:: * nb_*satellite type*s * self.potential_*satellite type*s (satellite types are scheduler, reactionner, poller, broker and receiver) :param satellites: dict of SatelliteLink objects :type satellites: dict :return: None """ for sat_type in [ "scheduler", "reactionner", "poller", "broker", "receiver" ]: # We get potential TYPE at realm level first for sat_link_uuid in getattr(self, "%ss" % sat_type): if sat_link_uuid not in satellites: continue sat_link = satellites[sat_link_uuid] # Found our declared satellite in the provided satellites if sat_link.active and not sat_link.spare: # Generic increment : realm.nb_TYPE += 1 setattr(self, "nb_%ss" % sat_type, getattr(self, "nb_%ss" % sat_type) + 1) break else: self.add_error( "Realm %s, satellite %s declared in the realm is not found " "in the allowed satellites!" % (self.name, sat_link.name)) logger.error( "Satellite %s declared in the realm %s not found " "in the allowed satellites!", sat_link.name, self.name) logger.info( " Realm %s: (in/potential) (schedulers:%d/%d) (pollers:%d/%d) " "(reactionners:%d/%d) (brokers:%d/%d) (receivers:%d/%d)", self.name, self.nb_schedulers, len(self.potential_schedulers), self.nb_pollers, len(self.potential_pollers), self.nb_reactionners, len(self.potential_reactionners), self.nb_brokers, len(self.potential_brokers), self.nb_receivers, len(self.potential_receivers)) def get_realms_by_explosion(self, realms): """Get all members of this realm including members of sub-realms on multi-levels :param realms: realms list, used to look for a specific one :type realms: alignak.objects.realm.Realms :return: list of members and add realm to realm_members attribute :rtype: list """ # If rec_tag is already set, then we detected a loop in the realms hierarchy! if getattr(self, 'rec_tag', False): self.add_error( "Error: there is a loop in the realm definition %s" % self.get_name()) return None # Ok, not in a loop, we tag the realm and parse its members self.rec_tag = True # Order realm members list by name self.realm_members = sorted(self.realm_members) for member in self.realm_members: realm = realms.find_by_name(member) if not realm: self.add_unknown_members(member) continue children = realm.get_realms_by_explosion(realms) if children is None: # We got a loop in our children definition self.all_sub_members = [] self.realm_members = [] return None # Return the list of all unique members return self.all_sub_members def set_level(self, level, realms): """Set the realm level in the realms hierarchy :return: None """ self.level = level if not self.level: logger.info("- %s", self.get_name()) else: logger.info(" %s %s", '+' * self.level, self.get_name()) self.all_sub_members = [] self.all_sub_members_names = [] for child in sorted(self.realm_members): child = realms.find_by_name(child) if not child: continue self.all_sub_members.append(child.uuid) self.all_sub_members_names.append(child.get_name()) grand_children = child.set_level(self.level + 1, realms) for grand_child in grand_children: if grand_child in self.all_sub_members_names: continue grand_child = realms.find_by_name(grand_child) if grand_child: self.all_sub_members_names.append(grand_child.get_name()) self.all_sub_members.append(grand_child.uuid) return self.all_sub_members_names def get_all_subs_satellites_by_type(self, sat_type, realms): """Get all satellites of the wanted type in this realm recursively :param sat_type: satellite type wanted (scheduler, poller ..) :type sat_type: :param realms: all realms :type realms: list of realm object :return: list of satellite in this realm :rtype: list """ res = copy.copy(getattr(self, sat_type)) for member in self.all_sub_members: res.extend(realms[member].get_all_subs_satellites_by_type( sat_type, realms)) return res def get_satellites_by_type(self, s_type): """Generic function to access one of the satellite attribute ie : self.pollers, self.reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.*type*s :rtype: list """ if hasattr(self, s_type + 's'): return getattr(self, s_type + 's') logger.debug("[realm %s] do not have this kind of satellites: %s", self.name, s_type) return [] def get_potential_satellites_by_type(self, satellites, s_type): """Generic function to access one of the potential satellite attribute ie : self.potential_pollers, self.potential_reactionners ... :param satellites: list of SatelliteLink objects :type satellites: SatelliteLink list :param s_type: satellite type wanted :type s_type: str :return: self.potential_*type*s :rtype: list """ if not hasattr(self, 'potential_' + s_type + 's'): logger.debug("[realm %s] do not have this kind of satellites: %s", self.name, s_type) return [] matching_satellites = [] for sat_link in satellites: if sat_link.uuid in getattr(self, s_type + 's'): matching_satellites.append(sat_link) if matching_satellites: logger.debug("- found %ss: %s", s_type, matching_satellites) return matching_satellites for sat_link in satellites: if sat_link.uuid in getattr(self, 'potential_' + s_type + 's'): matching_satellites.append(sat_link) # Do not limit to one satellite! # break logger.debug("- potential %ss: %s", s_type, matching_satellites) return matching_satellites def get_nb_of_must_have_satellites(self, s_type): """Generic function to access one of the number satellite attribute ie : self.nb_pollers, self.nb_reactionners ... :param s_type: satellite type wanted :type s_type: str :return: self.nb_*type*s :rtype: int """ if hasattr(self, 'nb_' + s_type + 's'): return getattr(self, 'nb_' + s_type + 's') logger.debug("[realm %s] do not have this kind of satellites: %s", self.name, s_type) return 0 def get_links_for_a_broker(self, pollers, reactionners, receivers, realms, manage_sub_realms=False): """Get a configuration dictionary with pollers, reactionners and receivers links for a broker :param pollers: pollers :type pollers: :param reactionners: reactionners :type reactionners: :param receivers: receivers :type receivers: :param realms: realms :type realms: :param manage_sub_realms: :type manage_sub_realms: True if the borker manages sub realms :return: dict containing pollers, reactionners and receivers links (key is satellite id) :rtype: dict """ # Create void satellite links cfg = { 'pollers': {}, 'reactionners': {}, 'receivers': {}, } # Our self.daemons are only identifiers... that we use to fill the satellite links for poller_id in self.pollers: poller = pollers[poller_id] cfg['pollers'][poller.uuid] = poller.give_satellite_cfg() for reactionner_id in self.reactionners: reactionner = reactionners[reactionner_id] cfg['reactionners'][ reactionner.uuid] = reactionner.give_satellite_cfg() for receiver_id in self.receivers: receiver = receivers[receiver_id] cfg['receivers'][receiver.uuid] = receiver.give_satellite_cfg() # If the broker manages sub realms, fill the satellite links... if manage_sub_realms: # Now pollers for poller_id in self.get_all_subs_satellites_by_type( 'pollers', realms): poller = pollers[poller_id] cfg['pollers'][poller.uuid] = poller.give_satellite_cfg() # Now reactionners for reactionner_id in self.get_all_subs_satellites_by_type( 'reactionners', realms): reactionner = reactionners[reactionner_id] cfg['reactionners'][ reactionner.uuid] = reactionner.give_satellite_cfg() # Now receivers for receiver_id in self.get_all_subs_satellites_by_type( 'receivers', realms): receiver = receivers[receiver_id] cfg['receivers'][receiver.uuid] = receiver.give_satellite_cfg() return cfg def get_links_for_a_scheduler(self, pollers, reactionners, brokers): """Get a configuration dictionary with pollers, reactionners and brokers links for a scheduler :return: dict containing pollers, reactionners and brokers links (key is satellite id) :rtype: dict """ # Create void satellite links cfg = { 'pollers': {}, 'reactionners': {}, 'brokers': {}, } # Our self.daemons are only identifiers... that we use to fill the satellite links try: for poller in self.pollers + self.get_potential_satellites_by_type( pollers, "poller"): if poller in pollers: poller = pollers[poller] cfg['pollers'][poller.uuid] = poller.give_satellite_cfg() for reactionner in self.reactionners + self.get_potential_satellites_by_type( reactionners, "reactionner"): if reactionner in reactionners: reactionner = reactionners[reactionner] cfg['reactionners'][ reactionner.uuid] = reactionner.give_satellite_cfg() for broker in self.brokers + self.get_potential_satellites_by_type( brokers, "broker"): if broker in brokers: broker = brokers[broker] cfg['brokers'][broker.uuid] = broker.give_satellite_cfg() except Exception as exp: # pylint: disable=broad-except logger.exception("realm.get_links_for_a_scheduler: %s", exp) # for poller in self.get_potential_satellites_by_type(pollers, "poller"): # logger.info("Poller: %s", poller) # cfg['pollers'][poller.uuid] = poller.give_satellite_cfg() # # for reactionner in self.get_potential_satellites_by_type(reactionners, "reactionner"): # cfg['reactionners'][reactionner.uuid] = reactionner.give_satellite_cfg() # # for broker in self.get_potential_satellites_by_type(brokers, "broker"): # cfg['brokers'][broker.uuid] = broker.give_satellite_cfg() return cfg
class ActionBase(AlignakObject): """ This abstract class is used just for having a common id for both actions and checks. """ process = None properties = { 'is_a': StringProp(default=''), 'type': StringProp(default=''), 'creation_time': FloatProp(default=0.0), '_in_timeout': BoolProp(default=False), 'status': StringProp(default='scheduled'), 'exit_status': IntegerProp(default=3), 'output': StringProp(default='', fill_brok=['full_status']), 't_to_go': FloatProp(default=0.0), 'check_time': IntegerProp(default=0), 'execution_time': FloatProp(default=0.0), 'u_time': FloatProp(default=0.0), 's_time': FloatProp(default=0.0), 'reactionner_tag': StringProp(default='None'), 'env': DictProp(default={}), 'module_type': StringProp(default='fork', fill_brok=['full_status']), 'worker_id': StringProp(default='none'), 'command': StringProp(), 'timeout': IntegerProp(default=10), 'ref': StringProp(default=''), } def __init__(self, params=None, parsing=True): super(ActionBase, self).__init__(params, parsing=parsing) # Set a creation time only if not provided if not params or 'creation_time' not in params: self.creation_time = time.time() # Set actions log only if not provided if not params or 'log_actions' not in params: self.log_actions = 'TEST_LOG_ACTIONS' in os.environ # Fill default parameters self.fill_default() def set_type_active(self): """Dummy function, only useful for checks""" pass def set_type_passive(self): """Dummy function, only useful for checks""" pass def get_local_environnement(self): """ Mix the environment and the environment variables into a new local environment dictionary Note: We cannot just update the global os.environ because this would effect all other checks. :return: local environment variables :rtype: dict """ # Do not use copy.copy() here, as the resulting copy still # changes the real environment (it is still a os._Environment # instance). local_env = os.environ.copy() for local_var in self.env: local_env[local_var] = self.env[local_var].encode('utf8') return local_env def execute(self): """Start this action command. The command will be executed in a subprocess. :return: None or str 'toomanyopenfiles' :rtype: None | str """ self.status = 'launched' self.check_time = time.time() self.wait_time = 0.0001 self.last_poll = self.check_time # Get a local env variables with our additional values self.local_env = self.get_local_environnement() # Initialize stdout and stderr. we will read them in small parts # if the fcntl is available self.stdoutdata = '' self.stderrdata = '' logger.debug("Launch command: '%s', ref: %s", self.command, self.ref) if self.log_actions: if os.environ['TEST_LOG_ACTIONS'] == 'WARNING': logger.warning("Launch command: '%s'", self.command) else: logger.info("Launch command: '%s'", self.command) return self.execute__() # OS specific part def get_outputs(self, out, max_plugins_output_length): """Get outputs from single output (split perfdata etc). Edit output, perf_data and long_output attributes. :param out: output data of a check :type out: str :param max_plugins_output_length: max plugin data length :type max_plugins_output_length: int :return: None """ # Squeeze all output after max_plugins_output_length out = out[:max_plugins_output_length] # manage escaped pipes out = out.replace(r'\|', '___PROTECT_PIPE___') # Then cuts by lines elts = out.split('\n') # For perf data elts_line1 = elts[0].split('|') # First line before | is output, strip it self.output = elts_line1[0].strip().replace('___PROTECT_PIPE___', '|') try: self.output = self.output.decode('utf8', 'ignore') except UnicodeEncodeError: pass # Init perfdata as empty self.perf_data = '' # After | it is perfdata, strip it if len(elts_line1) > 1: self.perf_data = elts_line1[1].strip().replace( '___PROTECT_PIPE___', '|') # Now manage others lines. Before the | it's long_output # And after it's all perf_data, \n joined long_output = [] in_perfdata = False for line in elts[1:]: # if already in perfdata, direct append if in_perfdata: self.perf_data += ' ' + line.strip().replace( '___PROTECT_PIPE___', '|') else: # not already in perf_data, search for the | part :) elts = line.split('|', 1) # The first part will always be long_output long_output.append(elts[0].strip().replace( '___PROTECT_PIPE___', '|')) if len(elts) > 1: in_perfdata = True self.perf_data += ' ' + elts[1].strip().replace( '___PROTECT_PIPE___', '|') # long_output is all non output and performance data, joined with \n self.long_output = '\n'.join(long_output) # Get sure the performance data are stripped self.perf_data = self.perf_data.strip() logger.debug("Command result for '%s': %d, %s", self.command, self.exit_status, self.output) if self.log_actions: if os.environ['TEST_LOG_ACTIONS'] == 'WARNING': logger.warning("Check result for '%s': %d, %s", self.command, self.exit_status, self.output) if self.perf_data: logger.warning("Performance data for '%s': %s", self.command, self.perf_data) else: logger.info("Check result for '%s': %d, %s", self.command, self.exit_status, self.output) if self.perf_data: logger.info("Performance data for '%s': %s", self.command, self.perf_data) def check_finished(self, max_plugins_output_length): """Handle action if it is finished (get stdout, stderr, exit code...) :param max_plugins_output_length: max plugin data length :type max_plugins_output_length: int :return: None """ # We must wait, but checks are variable in time # so we do not wait the same for an little check # than a long ping. So we do like TCP: slow start with *2 # but do not wait more than 0.1s. self.last_poll = time.time() _, _, child_utime, child_stime, _ = os.times() if self.process.poll() is None: logger.debug("Process pid=%d is still alive", self.process.pid) # polling every 1/2 s ... for a timeout in seconds, this is enough self.wait_time = min(self.wait_time * 2, 0.5) now = time.time() # If the fcntl is available (unix) we try to read in a # asynchronous mode, so we won't block the PIPE at 64K buffer # (deadlock...) if fcntl: self.stdoutdata += no_block_read(self.process.stdout) self.stderrdata += no_block_read(self.process.stderr) if (now - self.check_time) > self.timeout: logger.warning("Process pid=%d spent too much time: %d s", self.process.pid, now - self.check_time) self.kill__() self.status = 'timeout' self.execution_time = now - self.check_time self.exit_status = 3 # Do not keep a pointer to the process # todo: ??? del self.process # Get the user and system time _, _, n_child_utime, n_child_stime, _ = os.times() self.u_time = n_child_utime - child_utime self.s_time = n_child_stime - child_stime if self.log_actions: if os.environ['TEST_LOG_ACTIONS'] == 'WARNING': logger.warning("Action '%s' exited on timeout (%d s)", self.command, self.timeout) else: logger.info("Action '%s' exited on timeout (%d s)", self.command, self.timeout) return return logger.debug("Process pid=%d exited with %d", self.process.pid, self.process.returncode) # Get standards outputs from the communicate function if we do # not have the fcntl module (Windows, and maybe some special # unix like AIX) if not fcntl: (self.stdoutdata, self.stderrdata) = self.process.communicate() else: # The command was too quick and finished even before we can # poll it first. So finish the read. self.stdoutdata += no_block_read(self.process.stdout) self.stderrdata += no_block_read(self.process.stderr) self.exit_status = self.process.returncode if self.log_actions: if os.environ['TEST_LOG_ACTIONS'] == 'WARNING': logger.warning("Action '%s' exited with return code %d", self.command, self.exit_status) else: logger.info("Action '%s' exited with return code %d", self.command, self.exit_status) # we should not keep the process now # todo: ??? del self.process if ( # check for bad syntax in command line: 'sh: -c: line 0: unexpected EOF while looking for matching' in self.stderrdata or ('sh: -c:' in self.stderrdata and ': Syntax' in self.stderrdata) or 'Syntax error: Unterminated quoted string' in self.stderrdata): logger.warning("Return bad syntax in command line!") # Very, very ugly. But subprocess._handle_exitstatus does # not see a difference between a regular "exit 1" and a # bailing out shell. Strange, because strace clearly shows # a difference. (exit_group(1) vs. exit_group(257)) self.stdoutdata = self.stdoutdata + self.stderrdata self.exit_status = 3 if self.exit_status not in VALID_EXIT_STATUS: self.exit_status = 3 if not self.stdoutdata.strip(): self.stdoutdata = self.stderrdata # Now grep what we want in the output self.get_outputs(self.stdoutdata, max_plugins_output_length) # We can clean the useless properties now del self.stdoutdata del self.stderrdata self.status = 'done' self.execution_time = time.time() - self.check_time # Also get the system and user times _, _, n_child_utime, n_child_stime, _ = os.times() self.u_time = n_child_utime - child_utime self.s_time = n_child_stime - child_stime def copy_shell__(self, new_i): """Copy all attributes listed in 'only_copy_prop' from `self` to `new_i`. :param new_i: object to :type new_i: object :return: object with new properties added :rtype: object """ for prop in ONLY_COPY_PROP: setattr(new_i, prop, getattr(self, prop)) return new_i def got_shell_characters(self): """Check if the command_attribute (command line) has shell characters Shell characters are : '!', '$', '^', '&', '*', '(', ')', '~', '[', ']', '|', '{', '}', ';', '<', '>', '?', '`' :return: True if one shell character is found, False otherwise :rtype: bool """ for character in SHELLCHARS: if character in self.command: return True return False def execute__(self, force_shell=False): """Execute action in a subprocess :return: None """ pass def kill__(self): """Kill the action and close fds :return: None """ pass
class ActionBase(AlignakObject): # pylint: disable=too-many-instance-attributes """ This abstract class is used to have a common base for both actions (event handlers and notifications) and checks. The Action may be on internal one if it does require to use a Worker process to run the action because the Scheduler is able to resolve the action by itseld. This class is specialized according to the running OS. Currently, only Linux/Unix like OSes are tested """ process = None properties = { 'is_a': StringProp(default=u''), 'type': StringProp(default=u''), 'internal': BoolProp(default=False), 'creation_time': FloatProp(default=0.0), '_is_orphan': BoolProp(default=False), '_in_timeout': BoolProp(default=False), 'status': StringProp(default=ACT_STATUS_SCHEDULED), 'exit_status': IntegerProp(default=3), 'output': StringProp(default=u'', fill_brok=['full_status']), 'long_output': StringProp(default=u'', fill_brok=['full_status']), 'perf_data': StringProp(default=u'', fill_brok=['full_status']), 't_to_go': FloatProp(default=0.0), 'check_time': IntegerProp(default=0), 'last_poll': IntegerProp(default=0), 'execution_time': FloatProp(default=0.0), 'wait_time': FloatProp(default=0.001), 'u_time': FloatProp(default=0.0), 's_time': FloatProp(default=0.0), 'reactionner_tag': StringProp(default=u'None'), 'env': DictProp(default={}), 'module_type': StringProp(default=u'fork', fill_brok=['full_status']), 'my_worker': StringProp(default=u'none'), 'command': StringProp(default=''), 'timeout': IntegerProp(default=10), 'ref': StringProp(default=u'unset'), 'ref_type': StringProp(default=u'unset'), 'my_scheduler': StringProp(default=u'unassigned'), } def __init__(self, params=None, parsing=False): super(ActionBase, self).__init__(params, parsing=parsing) # Set a creation time only if not provided if not params or 'creation_time' not in params: self.creation_time = time.time() # Set actions log only if not provided if not params or 'log_actions' not in params: self.log_actions = 'ALIGNAK_LOG_ACTIONS' in os.environ # Fill default parameters self.fill_default() def is_launchable(self, timestamp): """Check if this action can be launched based on current time :param timestamp: time to compare :type timestamp: int :return: True if timestamp >= self.t_to_go, False otherwise :rtype: bool """ if self.t_to_go is None: return False return timestamp >= self.t_to_go def get_local_environnement(self): """ Mix the environment and the environment variables into a new local environment dictionary Note: We cannot just update the global os.environ because this would effect all other checks. :return: local environment variables :rtype: dict """ # Do not use copy.copy() here, as the resulting copy still # changes the real environment (it is still a os._Environment # instance). local_env = os.environ.copy() for local_var in self.env: local_env[local_var] = self.env[local_var] return local_env def execute(self): """Start this action command in a subprocess. :raise: ActionError 'toomanyopenfiles' if too many opened files on the system 'no_process_launched' if arguments parsing failed 'process_launch_failed': if the process launch failed :return: reference to the started process :rtype: psutil.Process """ self.status = ACT_STATUS_LAUNCHED self.check_time = time.time() self.wait_time = 0.0001 self.last_poll = self.check_time # Get a local env variables with our additional values self.local_env = self.get_local_environnement() # Initialize stdout and stderr. self.stdoutdata = '' self.stderrdata = '' logger.debug("Launch command: '%s', ref: %s, timeout: %s", self.command, self.ref, self.timeout) if self.log_actions: if os.environ['ALIGNAK_LOG_ACTIONS'] == 'WARNING': logger.warning("Launch command: '%s'", self.command) else: logger.info("Launch command: '%s'", self.command) return self._execute() # OS specific part def get_outputs(self, out, max_plugins_output_length): """Get check outputs from single output (split perfdata etc). Updates output, perf_data and long_output attributes. :param out: output data of a check :type out: str :param max_output: max plugin data length :type max_output: int :return: None """ # Squeeze all output after max_plugins_output_length out = out[:max_plugins_output_length] # manage escaped pipes out = out.replace(r'\|', '___PROTECT_PIPE___') # Then cuts by lines elts = out.split('\n') # For perf data elts_line1 = elts[0].split('|') # First line before | is output, strip it self.output = elts_line1[0].strip().replace('___PROTECT_PIPE___', '|') try: self.output = self.output.decode('utf8', 'ignore') except UnicodeEncodeError: pass except AttributeError: pass # Init perfdata as empty self.perf_data = '' # After | it is perfdata, strip it if len(elts_line1) > 1: self.perf_data = elts_line1[1].strip().replace( '___PROTECT_PIPE___', '|') # Now manage others lines. Before the | it's long_output # And after it's all perf_data, \n joined long_output = [] in_perfdata = False for line in elts[1:]: # if already in perfdata, direct append if in_perfdata: self.perf_data += ' ' + line.strip().replace( '___PROTECT_PIPE___', '|') else: # not already in perf_data, search for the | part :) elts = line.split('|', 1) # The first part will always be long_output long_output.append(elts[0].strip().replace( '___PROTECT_PIPE___', '|')) if len(elts) > 1: in_perfdata = True self.perf_data += ' ' + elts[1].strip().replace( '___PROTECT_PIPE___', '|') # long_output is all non output and performance data, joined with \n self.long_output = '\n'.join(long_output) # Get sure the performance data are stripped self.perf_data = self.perf_data.strip() logger.debug("Command result for '%s': %d, %s", self.command, self.exit_status, self.output) if self.log_actions: if os.environ['ALIGNAK_LOG_ACTIONS'] == 'WARNING': logger.warning("Check result for '%s': %d, %s", self.command, self.exit_status, self.output) if self.perf_data: logger.warning("Performance data for '%s': %s", self.command, self.perf_data) else: logger.info("Check result for '%s': %d, %s", self.command, self.exit_status, self.output) if self.perf_data: logger.info("Performance data for '%s': %s", self.command, self.perf_data) def check_finished(self, max_plugins_output_length): # pylint: disable=too-many-branches """Handle action if it is finished (get stdout, stderr, exit code...) :param max_plugins_output_length: max plugin data length :type max_plugins_output_length: int :return: None """ self.last_poll = time.time() _, _, child_utime, child_stime, _ = os.times() # Not yet finished... if self.process.poll() is None: # We must wait, but checks are variable in time so we do not wait the same # for a little check or a long ping. So we do like TCP: slow start with a very # shot time (0.0001 s) increased *2 but do not wait more than 0.5 s. self.wait_time = min(self.wait_time * 2, 0.5) now = time.time() # This log is really spamming... uncomment if you really need this information :) # logger.debug("%s - Process pid=%d is still alive", now, self.process.pid) # Get standard outputs in non blocking mode from the process streams stdout = no_block_read(self.process.stdout) stderr = no_block_read(self.process.stderr) try: self.stdoutdata += stdout.decode("utf-8") self.stderrdata += stderr.decode("utf-8") except AttributeError: pass if (now - self.check_time) > self.timeout: logger.warning( "Process pid=%d spent too much time: %.2f seconds", self.process.pid, now - self.check_time) self._in_timeout = True self._kill() self.status = ACT_STATUS_TIMEOUT self.execution_time = now - self.check_time self.exit_status = 3 if self.log_actions: if os.environ['ALIGNAK_LOG_ACTIONS'] == 'WARNING': logger.warning("Action '%s' exited on timeout (%d s)", self.command, self.timeout) else: logger.info("Action '%s' exited on timeout (%d s)", self.command, self.timeout) # Do not keep the process objcet del self.process # Replace stdout with stderr if stdout is empty self.stdoutdata = self.stdoutdata.strip() if not self.stdoutdata: self.stdoutdata = self.stderrdata # Now grep what we want in the output self.get_outputs(self.stdoutdata, max_plugins_output_length) # We can clean the useless properties now del self.stdoutdata del self.stderrdata # Get the user and system time _, _, n_child_utime, n_child_stime, _ = os.times() self.u_time = n_child_utime - child_utime self.s_time = n_child_stime - child_stime return return logger.debug("Process pid=%d exited with %d", self.process.pid, self.process.returncode) if fcntl: # Get standard outputs in non blocking mode from the process streams stdout = no_block_read(self.process.stdout) stderr = no_block_read(self.process.stderr) else: # Get standard outputs from the communicate function (stdout, stderr) = self.process.communicate() try: self.stdoutdata += stdout.decode("utf-8") except (UnicodeDecodeError, AttributeError): self.stdoutdata += stdout try: self.stderrdata += stderr.decode("utf-8") except (UnicodeDecodeError, AttributeError): self.stderrdata += stderr self.exit_status = self.process.returncode if self.log_actions: if os.environ['ALIGNAK_LOG_ACTIONS'] == 'WARNING': logger.warning("Action '%s' exited with code %d", self.command, self.exit_status) else: logger.info("Action '%s' exited with code %d", self.command, self.exit_status) # We do not need the process now del self.process # check for bad syntax in command line: if (self.stderrdata.find('sh: -c: line 0: unexpected EOF') >= 0 or (self.stderrdata.find('sh: -c: ') >= 0 and self.stderrdata.find(': Syntax') >= 0 or self.stderrdata.find('Syntax error: Unterminated quoted string') >= 0)): logger.warning("Bad syntax in command line!") # Very, very ugly. But subprocess._handle_exitstatus does # not see a difference between a regular "exit 1" and a # bailing out shell. Strange, because strace clearly shows # a difference. (exit_group(1) vs. exit_group(257)) self.stdoutdata = self.stdoutdata + self.stderrdata self.exit_status = 3 # Make sure that exit code is a valid exit code if self.exit_status not in VALID_EXIT_STATUS: self.exit_status = 3 # Replace stdout with stderr if stdout is empty self.stdoutdata = self.stdoutdata.strip() if not self.stdoutdata: self.stdoutdata = self.stderrdata # Now grep what we want in the output self.get_outputs(self.stdoutdata, max_plugins_output_length) # We can clean the useless properties now del self.stdoutdata del self.stderrdata self.status = ACT_STATUS_DONE self.execution_time = time.time() - self.check_time # Also get the system and user times _, _, n_child_utime, n_child_stime, _ = os.times() self.u_time = n_child_utime - child_utime self.s_time = n_child_stime - child_stime def copy_shell__(self, new_i): """Create all attributes listed in 'ONLY_COPY_PROP' and return `self` with these attributes. :param new_i: object to :type new_i: object :return: object with new properties added :rtype: object """ for prop in ONLY_COPY_PROP: setattr(new_i, prop, getattr(self, prop)) return new_i def got_shell_characters(self): """Check if the command_attribute (command line) has shell characters Shell characters are : '!', '$', '^', '&', '*', '(', ')', '~', '[', ']', '|', '{', '}', ';', '<', '>', '?', '`' :return: True if one shell character is found, False otherwise :rtype: bool """ return any(c in SHELLCHARS for c in self.command) def _execute(self, force_shell=False): """Execute action in a subprocess :return: None """ pass def _kill(self): """Kill the action and close fds :return: None """ pass
class MacroModulation(Item): """ Class to manage a MacroModulation A MacroModulation is defined to change critical and warning level in some periods (like the night) """ my_type = 'macromodulation' properties = Item.properties.copy() properties.update({ 'macromodulation_name': StringProp(fill_brok=['full_status']), 'modulation_period': StringProp(brok_transformation=to_name_if_possible, fill_brok=['full_status']), }) running_properties = Item.running_properties.copy() running_properties.update({ 'customs': DictProp(default={}, fill_brok=['full_status']), }) special_properties = ('modulation_period', ) macros = {} def get_name(self): """ Get the name of the macromodulation :return: the macromodulation name string :rtype: str """ if hasattr(self, 'macromodulation_name'): return self.macromodulation_name return 'Unnamed' def is_active(self, timperiods): """ Know if this macro is active for this correct period :return: True is we are in the period, otherwise False :rtype: bool """ now = int(time.time()) timperiod = timperiods[self.modulation_period] if not timperiod or timperiod.is_time_valid(now): return True return False def is_correct(self): """ Check if this object configuration is correct :: * Call our parent class is_correct checker :return: True if the configuration is correct, otherwise False :rtype: bool """ state = True # Ok just put None as modulation_period, means 24x7 if not hasattr(self, 'modulation_period'): self.modulation_period = None if not hasattr(self, 'customs') or not self.customs: msg = "[macromodulation::%s] contains no macro definition" % ( self.get_name()) self.add_error(msg) state = False return super(MacroModulation, self).is_correct() and state
class SatelliteLink(Item): """SatelliteLink is a common Class for links between Arbiter and other satellites. Used by the Dispatcher object. """ properties = Item.properties.copy() properties.update({ 'address': StringProp(default='localhost', fill_brok=['full_status']), 'timeout': IntegerProp(default=3, fill_brok=['full_status']), 'data_timeout': IntegerProp(default=120, fill_brok=['full_status']), 'check_interval': IntegerProp(default=60, fill_brok=['full_status']), 'max_check_attempts': IntegerProp(default=3, fill_brok=['full_status']), 'spare': BoolProp(default=False, fill_brok=['full_status']), 'manage_sub_realms': BoolProp(default=False, fill_brok=['full_status']), 'manage_arbiters': BoolProp(default=False, fill_brok=['full_status'], to_send=True), 'modules': ListProp(default=[''], to_send=True, split_on_coma=True), 'polling_interval': IntegerProp(default=1, fill_brok=['full_status'], to_send=True), 'use_timezone': StringProp(default='NOTSET', to_send=True), 'realm': StringProp(default='', fill_brok=['full_status'], brok_transformation=get_obj_name_two_args_and_void), 'realm_name': StringProp(default=''), 'satellitemap': DictProp(default={}, elts_prop=AddrProp, to_send=True, override=True), 'use_ssl': BoolProp(default=False, fill_brok=['full_status']), 'hard_ssl_name_check': BoolProp(default=True, fill_brok=['full_status']), 'passive': BoolProp(default=False, fill_brok=['full_status'], to_send=True), }) running_properties = Item.running_properties.copy() running_properties.update({ 'con': StringProp(default=None), 'alive': BoolProp(default=True, fill_brok=['full_status']), 'broks': StringProp(default=[]), # the number of poll attempt from the arbiter dispatcher 'attempt': IntegerProp(default=0, fill_brok=['full_status']), # the last connection attempt timestamp 'last_connection': IntegerProp(default=0, fill_brok=['full_status']), # the number of failed attempt for the connection 'connection_attempt': IntegerProp(default=0, fill_brok=['full_status']), # the number of failed attempt for the connection 'max_failed_connections': IntegerProp(default=3, fill_brok=['full_status']), # can be network ask or not (dead or check in timeout or error) 'reachable': BoolProp(default=True, fill_brok=['full_status']), 'last_check': IntegerProp(default=0, fill_brok=['full_status']), 'managed_confs': DictProp(default={}), 'is_sent': BoolProp(default=False), }) def __init__(self, *args, **kwargs): super(SatelliteLink, self).__init__(*args, **kwargs) self.fill_default() self.arb_satmap = {'address': '0.0.0.0', 'port': 0} if hasattr(self, 'address'): self.arb_satmap['address'] = self.address if hasattr(self, 'port'): try: self.arb_satmap['port'] = int(self.port) except ValueError: # pragma: no cover, simple protection logger.error("Satellite port must be an integer: %s", self.port) # Create the link connection if not self.con: self.create_connection() def get_name(self): """Get the name of the link based on its type if *mytype*_name is an attribute then returns self.*mytype*_name. otherwise returns "Unnamed *mytype*" Example : self.poller_name or "Unnamed poller" :return: String corresponding to the link name :rtype: str """ return getattr(self, "{0}_name".format(self.get_my_type()), "Unnamed {0}".format(self.get_my_type())) def set_arbiter_satellitemap(self, satellitemap): """ arb_satmap is the satellitemap in current context: - A SatelliteLink is owned by an Arbiter - satellitemap attribute of SatelliteLink is the map defined IN THE satellite configuration but for creating connections, we need the have the satellitemap of the Arbiter :return: None """ self.arb_satmap = { 'address': self.address, 'port': self.port, 'use_ssl': self.use_ssl, 'hard_ssl_name_check': self.hard_ssl_name_check } self.arb_satmap.update(satellitemap) def create_connection(self): """Initialize HTTP connection with a satellite (con attribute) and set uri attribute :return: None """ self.con = None # Create the HTTP client for the connection try: self.con = HTTPClient(address=self.arb_satmap['address'], port=self.arb_satmap['port'], timeout=self.timeout, data_timeout=self.data_timeout, use_ssl=self.use_ssl, strong_ssl=self.hard_ssl_name_check) self.uri = self.con.uri # Set the satellite as alive self.set_alive() except HTTPClientException as exp: logger.error("Error with '%s' when creating client: %s", self.get_name(), str(exp)) # Set the satellite as dead self.set_dead() def put_conf(self, conf): """Send the conf (serialized) to the satellite HTTP request to the satellite (POST / put_conf) :param conf: The conf to send (data depend on the satellite) :type conf: :return: None """ if not self.reachable: logger.warning("Not reachable for put_conf: %s", self.get_name()) return False try: self.con.post('put_conf', {'conf': conf}, wait='long') return True except HTTPClientConnectionException as exp: # pragma: no cover, simple protection logger.warning( "[%s] Connection error when sending configuration: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning( "[%s] Connection timeout when sending configuration: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: # pragma: no cover, simple protection logger.error("[%s] Error when sending configuration: %s", self.get_name(), str(exp)) self.con = None except AttributeError as exp: # pragma: no cover, simple protection # Connection is not created logger.error("[%s] put_conf - Connection does not exist!", self.get_name()) return False def get_all_broks(self): """Get and clean all of our broks :return: list of all broks in the satellite :rtype: list """ res = self.broks self.broks = [] return res def set_alive(self): """Set alive, reachable, and reset attempts. If we change state, raise a status brok update :return: None """ was_alive = self.alive self.alive = True self.reachable = True self.attempt = 0 # We came from dead to alive! We must propagate the good news if not was_alive: logger.warning("Setting the satellite %s as alive :)", self.get_name()) brok = self.get_update_status_brok() self.broks.append(brok) def set_dead(self): """Set the satellite into dead state: * Alive -> False * con -> None Create an update Brok :return:None """ was_alive = self.alive self.alive = False self.reachable = False self.con = None # We are dead now! ! We must propagate the sad news if was_alive: logger.warning("Setting the satellite %s as dead :(", self.get_name()) brok = self.get_update_status_brok() self.broks.append(brok) def add_failed_check_attempt(self, reason=''): """Go in reachable=False and add a failed attempt if we reach the max, go dead :param reason: the reason of adding an attempts (stack trace sometimes) :type reason: str :return: None """ self.reachable = False self.attempt += 1 self.attempt = min(self.attempt, self.max_check_attempts) logger.info("Failed attempt to %s (%d/%d), reason: %s", self.get_name(), self.attempt, self.max_check_attempts, reason) # Don't need to warn again and again if the satellite is already dead # Only warn when it is alive if self.alive: logger.warning("Add failed attempt to %s (%d/%d), reason: %s", self.get_name(), self.attempt, self.max_check_attempts, reason) # check when we just go HARD (dead) if self.attempt == self.max_check_attempts: self.set_dead() def update_infos(self, now): """Update satellite info each self.check_interval seconds so we smooth arbiter actions for just useful actions. Create update Brok :return: None """ # First look if it's not too early to ping if (now - self.last_check) < self.check_interval: return False self.last_check = now # We ping and update the managed list self.ping() if not self.alive: logger.info("Not alive for ping: %s", self.get_name()) return False if self.attempt > 0: logger.info("Not responding to ping: %s (%d / %d)", self.get_name(), self.attempt, self.max_check_attempts) return False self.update_managed_conf() # Update the state of this element brok = self.get_update_status_brok() self.broks.append(brok) def known_conf_managed_push(self, cfg_id, push_flavor): """The elements just got a new conf_id, we put it in our list because maybe the satellite is too busy to answer now :param cfg_id: config id :type cfg_id: int :param push_flavor: push_flavor we pushed earlier to the satellite :type push_flavor: int :return: None """ self.managed_confs[cfg_id] = push_flavor def ping(self): """Send a HTTP request to the satellite (GET /ping) Add failed attempt if an error occurs Otherwise, set alive this satellite :return: None """ if self.con is None: self.create_connection() # If the connection failed to initialize, bail out if self.con is None: self.add_failed_check_attempt('no connection exist on ping') return logger.debug("Pinging %s", self.get_name()) try: res = self.con.get('ping') # Should return us pong string if res == 'pong': self.set_alive() return True # This sould never happen! Except is the source code got modified! logger.warning("[%s] I responded '%s' to ping! WTF is it?", self.get_name(), res) self.add_failed_check_attempt('pinog / NOT pong') except HTTPClientConnectionException as exp: # pragma: no cover, simple protection logger.warning("[%s] Connection error when pinging: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning("[%s] Connection timeout when pinging: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: logger.error("[%s] Error when pinging: %s", self.get_name(), str(exp)) # todo: raise an error and set daemon as dead? # any other error than conenction or timeout is really a bad situation !!! self.add_failed_check_attempt(reason=str(exp)) except AttributeError as exp: # pragma: no cover, simple protection # Connection is not created logger.error("[%s] ping - Connection does not exist!", self.get_name()) return False def wait_new_conf(self): # pragma: no cover, no more used """Send a HTTP request to the satellite (GET /wait_new_conf) TODO: is it still useful, wait_new_conf is implemented in the HTTP interface of each daemon :return: True if wait new conf, otherwise False :rtype: bool """ if not self.reachable: logger.warning("Not reachable for wait_new_conf: %s", self.get_name()) return False try: logger.warning("Arbiter wants me to wait for a new configuration") self.con.get('wait_new_conf') return True except HTTPClientConnectionException as exp: logger.warning( "[%s] Connection error when waiting new configuration: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning( "[%s] Connection timeout when waiting new configuration: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: # pragma: no cover, simple protection logger.error("[%s] Error when waiting new configuration: %s", self.get_name(), str(exp)) except AttributeError as exp: # pragma: no cover, simple protection # Connection is not created logger.error("[%s] wait_new_conf - Connection does not exist!", self.get_name()) return False def have_conf(self, magic_hash=None): """Send a HTTP request to the satellite (GET /have_conf) Used to know if the satellite has a conf :param magic_hash: Config hash. Only used for HA arbiter communication :type magic_hash: int :return: Boolean indicating if the satellite has a (specific) configuration :type: bool """ if not self.reachable: logger.warning("Not reachable for have_conf: %s", self.get_name()) return False try: return self.con.get('have_conf', {'magic_hash': magic_hash}) except HTTPClientConnectionException as exp: logger.warning( "[%s] Connection error when testing if has configuration: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning( "[%s] Connection timeout when testing if has configuration: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: # pragma: no cover, simple protection logger.error("[%s] Error when testing if has configuration: %s", self.get_name(), str(exp)) except AttributeError as exp: # pragma: no cover, simple protection # Connection is not created logger.error("[%s] have_conf - Connection does not exist! - %s", self.get_name(), exp) return False def remove_from_conf(self, sched_id): # pragma: no cover, no more used """Send a HTTP request to the satellite (GET /remove_from_conf) Tell a satellite to remove a scheduler from conf TODO: is it still useful, remove_from_conf is implemented in the HTTP interface of each daemon :param sched_id: scheduler id to remove :type sched_id: int :return: True on success, False on failure, None if can't connect :rtype: bool | None TODO: Return False instead of None """ if not self.reachable: logger.warning("Not reachable for remove_from_conf: %s", self.get_name()) return try: self.con.get('remove_from_conf', {'sched_id': sched_id}) # todo: do not handle the result to confirm? return True except HTTPClientConnectionException as exp: logger.warning( "[%s] Connection error when removing from configuration: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning( "[%s] Connection timeout when removing from configuration: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: # pragma: no cover, simple protection logger.error("[%s] Error when removing from configuration: %s", self.get_name(), str(exp)) except AttributeError as exp: # pragma: no cover, simple protection # Connection is not created logger.error("[%s] remove_from_conf - Connection does not exist!", self.get_name()) return False def update_managed_conf(self): """Send a HTTP request to the satellite (GET /what_i_managed) and update managed_conf attribute with dict (cleaned) Set to {} on failure :return: None """ self.managed_confs = {} if not self.reachable: logger.warning("Not reachable for update_managed_conf: %s", self.get_name()) return try: res = self.con.get('what_i_managed') self.managed_confs = res # self.managed_confs = unserialize(str(res)) return True except HTTPClientConnectionException as exp: logger.warning( "[%s] Connection error when getting what I manage: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning( "[%s] Connection timeout when getting what I manage: %s", self.get_name(), str(exp)) logger.debug("Connection: %s", self.con.__dict__) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: # pragma: no cover, simple protection logger.warning("Error to the %s '%s' when getting what I manage", self.my_type, self.get_name()) logger.exception("Raised exception: %s", exp) except AttributeError as exp: # pragma: no cover, simple protection # Connection is not created logger.error( "[%s] update_managed_conf - Connection does not exist!", self.get_name()) return False def do_i_manage(self, cfg_id, push_flavor): """Tell if the satellite is managing cfg_id with push_flavor :param cfg_id: config id :type cfg_id: int :param push_flavor: flavor id, random it generated at parsing :type push_flavor: int :return: True if the satellite has push_flavor in managed_confs[cfg_id] :rtype: bool """ if self.managed_confs: logger.debug("My managed configurations:") for conf in self.managed_confs: logger.debug("- %s", conf) else: logger.debug("No managed configuration!") # If not even the cfg_id in the managed_conf, bail out if cfg_id not in self.managed_confs: logger.warning("I (%s) do not manage this configuration: %s", self, cfg_id) return False # maybe it's in but with a false push_flavor. check it :) return self.managed_confs[cfg_id] == push_flavor def push_broks(self, broks): """Send a HTTP request to the satellite (GET /ping) and THEN Send a HTTP request to the satellite (POST /push_broks) Send broks to the satellite The first ping ensure the satellite is there to avoid a big timeout :param broks: Brok list to send :type broks: list :return: True on success, False on failure :rtype: bool """ if not self.reachable: logger.warning("Not reachable for push_broks: %s", self.get_name()) return False try: self.con.post('push_broks', {'broks': broks}, wait='long') return True except HTTPClientConnectionException as exp: logger.warning("[%s] Connection error when pushing broks: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning("[%s] Connection timeout when pushing broks: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: # pragma: no cover, simple protection logger.error("[%s] Error when pushing broks: %s", self.get_name(), str(exp)) except AttributeError as exp: # pragma: no cover, simple protection # Connection is not created logger.error("[%s] push_broks - Connection does not exist!", self.get_name()) return False def get_external_commands(self): """Send a HTTP request to the satellite (GET /ping) and THEN send a HTTP request to the satellite (GET /get_external_commands) Get external commands from satellite. Un-serialize data received. :return: External Command list on success, [] on failure :rtype: list """ if not self.reachable: logger.warning("Not reachable for get_external_commands: %s", self.get_name()) return [] try: res = self.con.get('get_external_commands', wait='long') tab = unserialize(str(res)) # Protect against bad return if not isinstance(tab, list): self.con = None return [] return tab except HTTPClientConnectionException as exp: logger.warning( "[%s] Connection error when getting external commands: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: # pragma: no cover, simple protection logger.warning( "[%s] Connection timeout when getting external commands: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: # pragma: no cover, simple protection logger.error("[%s] Error when getting external commands: %s", self.get_name(), str(exp)) self.con = None except AttributeError as exp: # pragma: no cover, simple protection # Connection is not created logger.error( "[%s] get_external_commands - Connection does not exist!", self.get_name()) except AlignakClassLookupException as exp: # pragma: no cover, simple protection logger.error('Cannot un-serialize external commands received: %s', exp) return [] def prepare_for_conf(self): """Init cfg dict attribute with __class__.properties and extra __class__ attribute (like __init__ could do with an object) :return: None """ self.cfg = {'global': {}, 'schedulers': {}, 'arbiters': {}} properties = self.__class__.properties for prop, entry in properties.items(): if entry.to_send: self.cfg['global'][prop] = getattr(self, prop) cls = self.__class__ # Also add global values self.cfg['global']['statsd_host'] = cls.statsd_host self.cfg['global']['statsd_port'] = cls.statsd_port self.cfg['global']['statsd_prefix'] = cls.statsd_prefix self.cfg['global']['statsd_enabled'] = cls.statsd_enabled def add_global_conf_parameters(self, params): """Add some extra params in cfg dict attribute. Some attributes are in the global configuration :param params: dict to update cfg with :type params: dict :return: None """ for prop in params: self.cfg['global'][prop] = params[prop] def get_my_type(self): """Get the satellite type. Accessor to __class__.mytype ie : poller, scheduler, receiver, broker, arbiter or reactionner :return: Satellite type :rtype: str """ return self.__class__.my_type def give_satellite_cfg(self): """Get a configuration for this satellite. Not used by Scheduler and Arbiter (overridden) :return: Configuration for satellite :rtype: dict """ return { 'port': self.port, 'address': self.address, 'name': self.get_name(), 'instance_id': self.uuid, 'use_ssl': self.use_ssl, 'hard_ssl_name_check': self.hard_ssl_name_check, 'timeout': self.timeout, 'data_timeout': self.data_timeout, 'max_check_attempts': self.max_check_attempts, 'active': True, 'passive': self.passive, 'poller_tags': getattr(self, 'poller_tags', []), 'reactionner_tags': getattr(self, 'reactionner_tags', []) }
class Contact(Item): """Host class implements monitoring concepts for contact. For example it defines host_notification_period, service_notification_period etc. """ my_type = 'contact' properties = Item.properties.copy() properties.update({ 'contact_name': StringProp(fill_brok=['full_status']), 'alias': StringProp(default=u'', fill_brok=['full_status']), 'contactgroups': ListProp(default=[], fill_brok=['full_status']), 'host_notifications_enabled': BoolProp(default=True, fill_brok=['full_status']), 'service_notifications_enabled': BoolProp(default=True, fill_brok=['full_status']), 'host_notification_period': StringProp(default='', fill_brok=['full_status']), 'service_notification_period': StringProp(default='', fill_brok=['full_status']), 'host_notification_options': ListProp(default=[''], fill_brok=['full_status'], split_on_comma=True), 'service_notification_options': ListProp(default=[''], fill_brok=['full_status'], split_on_comma=True), # To be consistent with notificationway object attributes 'host_notification_commands': ListProp(default=[], fill_brok=['full_status']), 'service_notification_commands': ListProp(default=[], fill_brok=['full_status']), 'min_business_impact': IntegerProp(default=0, fill_brok=['full_status']), 'email': StringProp(default=u'none', fill_brok=['full_status']), 'pager': StringProp(default=u'none', fill_brok=['full_status']), 'address1': StringProp(default=u'none', fill_brok=['full_status']), 'address2': StringProp(default=u'none', fill_brok=['full_status']), 'address3': StringProp(default=u'none', fill_brok=['full_status']), 'address4': StringProp(default=u'none', fill_brok=['full_status']), 'address5': StringProp(default=u'none', fill_brok=['full_status']), 'address6': StringProp(default=u'none', fill_brok=['full_status']), 'can_submit_commands': BoolProp(default=False, fill_brok=['full_status']), 'is_admin': BoolProp(default=False, fill_brok=['full_status']), 'expert': BoolProp(default=False, fill_brok=['full_status']), 'retain_status_information': BoolProp(default=True, fill_brok=['full_status']), 'notificationways': ListProp(default=[], fill_brok=['full_status']), 'password': StringProp(default=u'NOPASSWORDSET', fill_brok=['full_status']), }) running_properties = Item.running_properties.copy() running_properties.update({ 'modified_attributes': IntegerProp(default=0, fill_brok=['full_status'], retention=True), 'modified_host_attributes': IntegerProp(default=0, fill_brok=['full_status'], retention=True), 'modified_service_attributes': IntegerProp(default=0, fill_brok=['full_status'], retention=True), 'in_scheduled_downtime': BoolProp(default=False, fill_brok=['full_status', 'check_result'], retention=True), 'broks': ListProp(default=[]), # and here broks raised 'customs': DictProp(default={}, fill_brok=['full_status']), }) # This tab is used to transform old parameters name into new ones # so from Nagios2 format, to Nagios3 ones. # Or Alignak deprecated names like criticity old_properties = { 'min_criticity': 'min_business_impact', } macros = { 'CONTACTNAME': 'contact_name', 'CONTACTALIAS': 'alias', 'CONTACTEMAIL': 'email', 'CONTACTPAGER': 'pager', 'CONTACTADDRESS1': 'address1', 'CONTACTADDRESS2': 'address2', 'CONTACTADDRESS3': 'address3', 'CONTACTADDRESS4': 'address4', 'CONTACTADDRESS5': 'address5', 'CONTACTADDRESS6': 'address6', 'CONTACTGROUPNAME': 'get_groupname', 'CONTACTGROUPNAMES': 'get_groupnames' } special_properties = ('service_notification_commands', 'host_notification_commands', 'service_notification_period', 'host_notification_period', 'service_notification_options', 'host_notification_options', 'contact_name') simple_way_parameters = ('service_notification_period', 'host_notification_period', 'service_notification_options', 'host_notification_options', 'service_notification_commands', 'host_notification_commands', 'min_business_impact') def __init__(self, params=None, parsing=True): if params is None: params = {} # At deserialization, thoses are dict # TODO: Separate parsing instance from recreated ones for prop in [ 'service_notification_commands', 'host_notification_commands' ]: if prop in params and isinstance(params[prop], list) and params[prop] \ and isinstance(params[prop][0], dict): new_list = [ CommandCall(elem, parsing=parsing) for elem in params[prop] ] # We recreate the object setattr(self, prop, new_list) # And remove prop, to prevent from being overridden del params[prop] super(Contact, self).__init__(params, parsing=parsing) def __str__(self): # pragma: no cover return '<Contact %s, uuid=%s, use: %s />' \ % (self.get_name(), self.uuid, getattr(self, 'use', None)) __repr__ = __str__ def serialize(self): res = super(Contact, self).serialize() for prop in [ 'service_notification_commands', 'host_notification_commands' ]: if getattr(self, prop) is None: res[prop] = None else: res[prop] = [elem.serialize() for elem in getattr(self, prop)] return res def get_name(self): """Get contact name :return: contact name :rtype: str """ if self.is_tpl(): return "tpl-%s" % (getattr(self, 'name', 'unnamed')) return getattr(self, 'contact_name', 'unnamed') def get_groupname(self): """ Get the first group name whose contact belongs to :return: group name :rtype: str """ if self.contactgroups: return self.contactgroups[0] return 'Unknown' def get_groupnames(self): """ Get all the groups name whose contact belongs to :return: comma separated list of the groups names :rtype: str """ if self.contactgroups: return ', '.join(self.contactgroups) return 'Unknown' def want_service_notification(self, notifways, timeperiods, timestamp, state, n_type, business_impact, cmd=None): """Check if notification options match the state of the service :param timestamp: time we want to notify the contact (usually now) :type timestamp: int :param state: host or service state ("WARNING", "CRITICAL" ..) :type state: str :param n_type: type of notification ("PROBLEM", "RECOVERY" ..) :type n_type: str :param business_impact: impact of this service :type business_impact: int :param cmd: command launched to notify the contact :type cmd: str :return: True if contact wants notification, otherwise False :rtype: bool """ if not self.service_notifications_enabled: return False # If we are in downtime, we do not want notification for downtime_id in self.downtimes: downtime = self.downtimes[downtime_id] if downtime.is_in_effect: self.in_scheduled_downtime = True return False self.in_scheduled_downtime = False # Now the rest is for sub notificationways. If one is OK, we are ok # We will filter in another phase for notifway_id in self.notificationways: notifway = notifways[notifway_id] nw_b = notifway.want_service_notification(timeperiods, timestamp, state, n_type, business_impact, cmd) if nw_b: return True # Oh... no one is ok for it? so no, sorry return False def want_host_notification(self, notifways, timeperiods, timestamp, state, n_type, business_impact, cmd=None): """Check if notification options match the state of the host :param timestamp: time we want to notify the contact (usually now) :type timestamp: int :param state: host or service state ("UP", "DOWN" ..) :type state: str :param n_type: type of notification ("PROBLEM", "RECOVERY" ..) :type n_type: str :param business_impact: impact of this host :type business_impact: int :param cmd: command launch to notify the contact :type cmd: str :return: True if contact wants notification, otherwise False :rtype: bool """ if not self.host_notifications_enabled: return False # If we are in downtime, we do not want notification for downtime in self.downtimes: if downtime.is_in_effect: self.in_scheduled_downtime = True return False self.in_scheduled_downtime = False # Now it's all for sub notificationways. If one is OK, we are OK # We will filter in another phase for notifway_id in self.notificationways: notifway = notifways[notifway_id] nw_b = notifway.want_host_notification(timeperiods, timestamp, state, n_type, business_impact, cmd) if nw_b: return True # Oh, nobody..so NO :) return False def get_notification_commands(self, notifways, n_type, command_name=False): """Get notification commands for object type :param notifways: list of alignak.objects.NotificationWay objects :type notifways: NotificationWays :param n_type: object type (host or service) :type n_type: string :param command_name: True to update the inner property with the name of the command, False to update with the Command objects list :type command_name: bool :return: command list :rtype: list[alignak.objects.command.Command] """ res = [] for notifway_id in self.notificationways: notifway = notifways[notifway_id] res.extend(notifway.get_notification_commands(n_type)) # Update inner notification commands property with command name or command if command_name: setattr(self, n_type + '_notification_commands', [c.get_name() for c in res]) else: setattr(self, n_type + '_notification_commands', res) return res def is_correct(self): """Check if this object configuration is correct :: * Check our own specific properties * Call our parent class is_correct checker :return: True if the configuration is correct, otherwise False :rtype: bool """ state = True cls = self.__class__ # Internal checks before executing inherited function... # There is a case where there is no nw: when there is not special_prop defined # at all!! if self.notificationways == []: for prop in self.special_properties: if not hasattr(self, prop): msg = "[contact::%s] %s property is missing" % ( self.get_name(), prop) self.add_error(msg) state = False if not hasattr(self, 'contact_name'): if hasattr(self, 'alias'): # Use the alias if we miss the contact_name self.contact_name = self.alias for char in cls.illegal_object_name_chars: if char not in self.contact_name: continue msg = "[contact::%s] %s character not allowed in contact_name" \ % (self.get_name(), char) self.add_error(msg) state = False return super(Contact, self).is_correct() and state def raise_enter_downtime_log_entry(self): """Raise CONTACT DOWNTIME ALERT entry (info level) Format is : "CONTACT DOWNTIME ALERT: *get_name()*;STARTED; Contact has entered a period of scheduled downtime" Example : "CONTACT DOWNTIME ALERT: test_contact;STARTED; Contact has entered a period of scheduled downtime" :return: None """ brok = make_monitoring_log( 'info', "CONTACT DOWNTIME ALERT: %s;STARTED; " "Contact has entered a period of scheduled downtime" % self.get_name()) self.broks.append(brok) def raise_exit_downtime_log_entry(self): """Raise CONTACT DOWNTIME ALERT entry (info level) Format is : "CONTACT DOWNTIME ALERT: *get_name()*;STOPPED; Contact has entered a period of scheduled downtime" Example : "CONTACT DOWNTIME ALERT: test_contact;STOPPED; Contact has entered a period of scheduled downtime" :return: None """ brok = make_monitoring_log( 'info', "CONTACT DOWNTIME ALERT: %s;STOPPED; " "Contact has exited from a period of scheduled downtime" % self.get_name()) self.broks.append(brok) def raise_cancel_downtime_log_entry(self): """Raise CONTACT DOWNTIME ALERT entry (info level) Format is : "CONTACT DOWNTIME ALERT: *get_name()*;CANCELLED; Contact has entered a period of scheduled downtime" Example : "CONTACT DOWNTIME ALERT: test_contact;CANCELLED; Contact has entered a period of scheduled downtime" :return: None """ brok = make_monitoring_log( 'info', "CONTACT DOWNTIME ALERT: %s;CANCELLED; " "Scheduled downtime for contact has been cancelled." % self.get_name()) self.broks.append(brok)
class SchedulerLink(SatelliteLink): """ Class to manage the scheduler information """ # Ok we lie a little here because we are a mere link in fact my_type = 'scheduler' properties = SatelliteLink.properties.copy() properties.update({ 'scheduler_name': StringProp(fill_brok=['full_status']), 'port': IntegerProp(default=7768, fill_brok=['full_status']), 'weight': IntegerProp(default=1, fill_brok=['full_status']), 'skip_initial_broks': BoolProp(default=False, fill_brok=['full_status']), 'accept_passive_unknown_check_results': BoolProp(default=False, fill_brok=['full_status']), }) running_properties = SatelliteLink.running_properties.copy() running_properties.update({ 'conf': StringProp(default=None), 'conf_package': DictProp(default={}), 'need_conf': StringProp(default=True), 'external_commands': StringProp(default=[]), 'push_flavor': IntegerProp(default=0), }) def run_external_commands( self, commands): # pragma: no cover, seems not to be used anywhere """ Run external commands :param commands: :type commands: :return: False, None :rtype: bool | None TODO: this function seems to be used by the arbiter when it needs to make its schedulers run external commands. Currently, it is not used, but will it be? TODO: need to recode this function because return shouod always be boolean """ logger.debug("[%s] run_external_commands", self.get_name()) if self.con is None: self.create_connection() if not self.alive: return None logger.debug("[%s] Sending %d commands", self.get_name(), len(commands)) try: self.con.post('run_external_commands', {'cmds': commands}) except HTTPClientConnectionException as exp: logger.warning( "[%s] Connection error when sending run_external_commands", self.get_name()) self.add_failed_check_attempt(reason=str(exp)) self.set_dead() except HTTPClientTimeoutException as exp: logger.warning( "[%s] Connection timeout when sending run_external_commands: %s", self.get_name(), str(exp)) self.add_failed_check_attempt(reason=str(exp)) except HTTPClientException as exp: # pragma: no cover, simple protection logger.error("[%s] Error when sending run_external_commands: %s", self.get_name(), str(exp)) self.con = None else: return True return False def register_to_my_realm( self): # pragma: no cover, seems not to be used anywhere """ Add this reactionner to the realm :return: None """ self.realm.schedulers.append(self) def give_satellite_cfg(self): """ Get configuration of the scheduler satellite :return: dictionary of scheduler information :rtype: dict """ return { 'port': self.port, 'address': self.address, 'name': self.get_name(), 'instance_id': self.uuid, 'active': self.conf is not None, 'push_flavor': self.push_flavor, 'timeout': self.timeout, 'data_timeout': self.data_timeout, 'max_check_attempts': self.max_check_attempts, 'use_ssl': self.use_ssl, 'hard_ssl_name_check': self.hard_ssl_name_check } def get_override_configuration(self): """ Some parameters can give as 'overridden parameters' like use_timezone so they will be mixed (in the scheduler) with the standard conf sent by the arbiter :return: dictionary of properties :rtype: dict """ res = {} properties = self.__class__.properties for prop, entry in properties.items(): if entry.override: res[prop] = getattr(self, prop) return res