Пример #1
0
    def __init__(self, mod_conf, host, port, socket, allowed_hosts, database_file, max_logs_age, pnp_path, debug=None, debug_queries=False):
        BaseModule.__init__(self, mod_conf)
        self.host = host
        self.port = port
        self.socket = socket
        self.allowed_hosts = allowed_hosts
        self.database_file = database_file
        self.max_logs_age = max_logs_age
        self.pnp_path = pnp_path
        self.debug = debug
        self.debug_queries = debug_queries

        #Our datas
        self.configs = {}
        self.hosts = SortedDict()
        self.services = SortedDict()
        self.contacts = SortedDict()
        self.hostgroups = SortedDict()
        self.servicegroups = SortedDict()
        self.contactgroups = SortedDict()
        self.timeperiods = SortedDict()
        self.commands = SortedDict()
        #Now satellites
        self.schedulers = SortedDict()
        self.pollers = SortedDict()
        self.reactionners = SortedDict()
        self.brokers = SortedDict()
        self.service_id_cache = {}

        self.instance_ids = []

        self.number_of_objects = 0
        self.last_need_data_send = time.time()
Пример #2
0
class Livestatus_broker(BaseModule):
    def __init__(self, mod_conf, host, port, socket, allowed_hosts, database_file, max_logs_age, pnp_path, debug=None, debug_queries=False):
        BaseModule.__init__(self, mod_conf)
        self.host = host
        self.port = port
        self.socket = socket
        self.allowed_hosts = allowed_hosts
        self.database_file = database_file
        self.max_logs_age = max_logs_age
        self.pnp_path = pnp_path
        self.debug = debug
        self.debug_queries = debug_queries

        #Our datas
        self.configs = {}
        self.hosts = SortedDict()
        self.services = SortedDict()
        self.contacts = SortedDict()
        self.hostgroups = SortedDict()
        self.servicegroups = SortedDict()
        self.contactgroups = SortedDict()
        self.timeperiods = SortedDict()
        self.commands = SortedDict()
        #Now satellites
        self.schedulers = SortedDict()
        self.pollers = SortedDict()
        self.reactionners = SortedDict()
        self.brokers = SortedDict()
        self.service_id_cache = {}

        self.instance_ids = []

        self.number_of_objects = 0
        self.last_need_data_send = time.time()


    #Called by Broker so we can do init stuff
    def init(self):
        print "Initialisation of the livestatus broker"

        #to_queue is where we get broks from Broker
        #self.to_q = self.properties['to_queue']

        #from_quue is where we push back objects like
        #external commands to the broker
        #self.from_q = self.properties['from_queue']

        self.prepare_log_db()
        self.prepare_pnp_path()


        self.livestatus = LiveStatus(self.configs, self.hosts, self.services, self.contacts, self.hostgroups, self.servicegroups, self.contactgroups, self.timeperiods, self.commands, self.schedulers, self.pollers, self.reactionners, self.brokers, self.dbconn, self.pnp_path, self.from_q)

        m = MacroResolver()
        m.output_macros = ['HOSTOUTPUT', 'HOSTPERFDATA', 'HOSTACKAUTHOR', 'HOSTACKCOMMENT', 'SERVICEOUTPUT', 'SERVICEPERFDATA', 'SERVICEACKAUTHOR', 'SERVICEACKCOMMENT']


    def manage_program_status_brok(self, b):
        data = b.data
        c_id = data['instance_id']
        print "Creating config:", c_id, data
        c = Config()
        for prop in data:
            setattr(c, prop, data[prop])
        #print "CFG:", c
        self.configs[0] = c
        # And we save that we got data from this instance_id
        self.instance_ids.append(c_id)

        # We should clean all previously added hosts and services
        inst_id = data['instance_id']
        to_del = []
        to_del_srv = []
        for h in self.hosts.values():
            # If the host was in this instance, del it
            if h.instance_id == inst_id:
                to_del.append(h.host_name)

                
        for s in self.services.values():
            if s.instance_id == inst_id:
                to_del_srv.append(s.host_name + s.service_description)

        # Now clean hostgroups too
        for hg in self.hostgroups.values():
            print "Len before exclude", len(hg.members)
            # Exclude from members the hosts with this inst_id
            hg.members = [h for h in hg.members if h.instance_id != inst_id]
            print "Len after", len(hg.members)

        # Now clean service groups
        for sg in self.servicegroups.values():
            sg.members = [s for s in sg.members if s.instance_id != inst_id]

        # Ok, really clean the hosts
        for i in to_del:
            try:
                del self.hosts[i]
            except KeyError: # maybe it was not inserted in a good way, pass it
                pass

        # And services
        for i in to_del_srv:
            try:
                del self.services[i]
            except KeyError: # maybe it was not inserted in a good way, pass it
                pass



    def manage_update_program_status_brok(self, b):
        data = b.data
        c_id = data['instance_id']

        if c_id not in self.instance_ids:
            # Do not ask data too quickly, very dangerous
            # one a minute
            if time.time() - self.last_need_data_send > 60:
                print "I ask the broker for instance id data :", c_id
                msg = Message(id=0, type='NeedData', data={'full_instance_id' : c_id})
                self.from_q.put(msg)
                self.last_need_data_send = time.time()
            return

        # We have only one config here, with id 0
        c = self.configs[0]
        self.update_element(c, data)
    

    def set_schedulingitem_values(self, i):
        i.check_period = self.get_timeperiod(i.check_period)
        i.notification_period = self.get_timeperiod(i.notification_period)
        i.contacts = self.get_contacts(i.contacts)
        i.rebuild_ref()
        #Escalations is not use for status_dat
        del i.escalations
        

    def manage_initial_host_status_brok(self, b):
        data = b.data
        
        host_name = data['host_name']
        inst_id = data['instance_id']
        #print "Creating host:", h_id, b
        h = Host({})
        self.update_element(h, data)        
        self.set_schedulingitem_values(h)
        
        h.service_ids = []
        h.services = []
        h.instance_id = inst_id
        # We need to rebuild Downtime and Comment relationship
        for dtc in h.downtimes + h.comments:
            dtc.ref = h
        self.hosts[host_name] = h
        self.number_of_objects += 1


    #In fact, an update of a host is like a check return
    def manage_update_host_status_brok(self, b):
        self.manage_host_check_result_brok(b)
        data = b.data
        host_name = data['host_name']
        #In the status, we've got duplicated item, we must relink thems
        try:
            h = self.hosts[host_name]
        except KeyError:
            print "Warning : the host %s is unknown!" % host_name
            return
        self.update_element(h, data)
        self.set_schedulingitem_values(h)
        for dtc in h.downtimes + h.comments:
            dtc.ref = h
        self.livestatus.count_event('host_checks')


    def manage_initial_hostgroup_status_brok(self, b):
        data = b.data
        hostgroup_name = data['hostgroup_name']
        members = data['members']
        del data['members']
        
        # Maybe we already got this hostgroup. If so, use the existing object
        # because in different instance, we will ahve the same group with different
        # elements
        try:
            hg = self.hostgroups[hostgroup_name]
        except KeyError:
            # If we got none, create a new one
            #print "Creating hostgroup:", hg_id, data
            hg = Hostgroup()
            # Set by default members to a void list
            setattr(hg, 'members', [])

        self.update_element(hg, data)

        for (h_id, h_name) in members:
            if h_name in self.hosts:
                hg.members.append(self.hosts[h_name])
                # Should got uniq value, do uniq this list
                hg.members = list(set(hg.members))

        #print "HG:", hg
        self.hostgroups[hostgroup_name] = hg
        self.number_of_objects += 1


    def manage_initial_service_status_brok(self, b):
        data = b.data
        s_id = data['id']
        host_name = data['host_name']
        service_description = data['service_description']
        inst_id = data['instance_id']
        
        #print "Creating Service:", s_id, data
        s = Service({})
        s.instance_id = inst_id

        self.update_element(s, data)
        self.set_schedulingitem_values(s)
        
        try:
            h = self.hosts[host_name]
            # Reconstruct the connection between hosts and services
            h.services.append(s)
            # There is already a s.host_name, but a reference to the h object can be useful too
            s.host = h
        except Exception:
            return
        for dtc in s.downtimes + s.comments:
            dtc.ref = s
        self.services[host_name+service_description] = s
        self.number_of_objects += 1
        # We need this for manage_initial_servicegroup_status_brok where it
        # will speed things up dramatically
        self.service_id_cache[s.id] = s


    #In fact, an update of a service is like a check return
    def manage_update_service_status_brok(self, b):
        self.manage_service_check_result_brok(b)
        data = b.data
        host_name = data['host_name']
        service_description = data['service_description']
        #In the status, we've got duplicated item, we must relink thems
        try:
            s = self.services[host_name+service_description]
        except KeyError:
            print "Warning : the service %s/%s is unknown!" % (host_name, service_description)
            return
        self.update_element(s, data)
        self.set_schedulingitem_values(s)
        for dtc in s.downtimes + s.comments:
            dtc.ref = s
        self.livestatus.count_event('service_checks')
   


    def manage_initial_servicegroup_status_brok(self, b):
        data = b.data
        sg_id = data['id']
        servicegroup_name = data['servicegroup_name']
        members = data['members']
        del data['members']

        # Like for hostgroups, maybe we already got this
        # service group from another instance, need to
        # factorize all
        try:
            sg = self.servicegroups[servicegroup_name]
        except KeyError:
            #print "Creating servicegroup:", sg_id, data
            sg = Servicegroup()
            # By default set members as a void list
            setattr(sg, 'members', [])

        self.update_element(sg, data)

        for (s_id, s_name) in members:
            # A direct lookup by s_host_name+s_name is not possible
            # because we don't have the host_name in members, only ids.
            try:
                sg.members.append(self.service_id_cache[s_id])
            except Exception:
                pass

        sg.members = list(set(sg.members))
        self.servicegroups[servicegroup_name] = sg
        self.number_of_objects += 1


    def manage_initial_contact_status_brok(self, b):
        data = b.data
        contact_name = data['contact_name']
        #print "Creating Contact:", c_id, data
        c = Contact({})
        self.update_element(c, data)
        #print "C:", c
        self.contacts[contact_name] = c
        self.number_of_objects += 1


    def manage_initial_contactgroup_status_brok(self, b):
        data = b.data
        contactgroup_name = data['contactgroup_name']
        members = data['members']
        del data['members']
        #print "Creating contactgroup:", cg_id, data
        cg = Contactgroup()
        self.update_element(cg, data)
        setattr(cg, 'members', [])
        for (c_id, c_name) in members:
            if c_name in self.contacts:
                cg.members.append(self.contacts[c_name])
        #print "CG:", cg
        self.contactgroups[contactgroup_name] = cg
        self.number_of_objects += 1


    def manage_initial_timeperiod_status_brok(self, b):
        data = b.data
        timeperiod_name = data['timeperiod_name']
        #print "Creating Timeperiod:", tp_id, data
        tp = Timeperiod({})
        self.update_element(tp, data)
        #print "TP:", tp
        self.timeperiods[timeperiod_name] = tp
        self.number_of_objects += 1


    def manage_initial_command_status_brok(self, b):
        data = b.data
        command_name = data['command_name']
        #print "Creating Command:", c_id, data
        c = Command({})
        self.update_element(c, data)
        #print "CMD:", c
        self.commands[command_name] = c
        self.number_of_objects += 1


    def manage_initial_scheduler_status_brok(self, b):
        data = b.data
        scheduler_name = data['scheduler_name']
        print "Creating Scheduler:", scheduler_name, data
        sched = SchedulerLink({})
        print "Created a new scheduler", sched
        self.update_element(sched, data)
        print "Updated scheduler"
        #print "CMD:", c
        self.schedulers[scheduler_name] = sched
        print "scheduler added"
        #print "MONCUL: Add a new scheduler ", sched
        self.number_of_objects += 1


    def manage_update_scheduler_status_brok(self, b):
        data = b.data
        scheduler_name = data['scheduler_name']
        try:
            s = self.schedulers[scheduler_name]
            self.update_element(s, data)
            #print "S:", s
        except Exception:
            pass


    def manage_initial_poller_status_brok(self, b):
        data = b.data
        poller_name = data['poller_name']
        print "Creating Poller:", poller_name, data
        poller = PollerLink({})
        print "Created a new poller", poller
        self.update_element(poller, data)
        print "Updated poller"
        #print "CMD:", c
        self.pollers[poller_name] = poller
        print "poller added"
        #print "MONCUL: Add a new scheduler ", sched
        self.number_of_objects += 1


    def manage_update_poller_status_brok(self, b):
        data = b.data
        poller_name = data['poller_name']
        try:
            s = self.pollers[poller_name]
            self.update_element(s, data)
        except Exception:
            pass


    def manage_initial_reactionner_status_brok(self, b):
        data = b.data
        reactionner_name = data['reactionner_name']
        print "Creating Reactionner:", reactionner_name, data
        reac = ReactionnerLink({})
        print "Created a new reactionner", reac
        self.update_element(reac, data)
        print "Updated reactionner"
        #print "CMD:", c
        self.reactionners[reactionner_name] = reac
        print "reactionner added"
        #print "MONCUL: Add a new scheduler ", sched
        self.number_of_objects += 1


    def manage_update_reactionner_status_brok(self, b):
        data = b.data
        reactionner_name = data['reactionner_name']
        try:
            s = self.reactionners[reactionner_name]
            self.update_element(s, data)
        except Exception:
            pass


    def manage_initial_broker_status_brok(self, b):
        data = b.data
        broker_name = data['broker_name']
        print "Creating Broker:", broker_name, data
        broker = BrokerLink({})
        print "Created a new broker", broker
        self.update_element(broker, data)
        print "Updated broker"
        #print "CMD:", c
        self.brokers[broker_name] = broker
        print "broker added"
        #print "MONCUL: Add a new scheduler ", sched
        self.number_of_objects += 1


    def manage_update_broker_status_brok(self, b):
        data = b.data
        broker_name = data['broker_name']
        try:
            s = self.brokers[broker_name]
            self.update_element(s, data)
        except Exception:
            pass


    #A service check have just arrived, we UPDATE data info with this
    def manage_service_check_result_brok(self, b):
        data = b.data
        host_name = data['host_name']
        service_description = data['service_description']
        try:
            s = self.services[host_name+service_description]
            self.update_element(s, data)
        except Exception:
            pass


    #A service check update have just arrived, we UPDATE data info with this
    def manage_service_next_schedule_brok(self, b):
        self.manage_service_check_result_brok(b)


    def manage_host_check_result_brok(self, b):
        data = b.data
        host_name = data['host_name']
        try:
            h = self.hosts[host_name]
            self.update_element(h, data)
        except Exception:
            pass


    # this brok should arrive within a second after the host_check_result_brok
    def manage_host_next_schedule_brok(self, b):
        self.manage_host_check_result_brok(b)


    #A log brok will be written into a database
    def manage_log_brok(self, b):
        data = b.data

        line = data['log'].encode('UTF-8').rstrip()

        # split line and make sql insert
        #print "LOG--->", line
        # [1278280765] SERVICE ALERT: test_host_0
        # split leerzeichen
        if line[0] != '[' and line[11] != ']':
            pass
            print "INVALID"
            # invalid
        else:
            service_states = { 'OK' : 0, 'WARNING' : 1, 'CRITICAL' : 2, 'UNKNOWN' : 3, 'RECOVERY' : 0 }
            host_states = { 'UP' : 0, 'DOWN' : 1, 'UNREACHABLE' : 2, 'UNKNOWN': 3, 'RECOVERY' : 0 }

            # 'attempt', 'class', 'command_name', 'comment', 'contact_name', 'host_name', 'lineno', 'message',
            # 'options', 'plugin_output', 'service_description', 'state', 'state_type', 'time', 'type',
            # 0:info, 1:state, 2:program, 3:notification, 4:passive, 5:command

            # lineno, message?, plugin_output?
            logobject = LOGOBJECT_INFO
            logclass = LOGCLASS_INVALID
            attempt, state = [0] * 2
            command_name, comment, contact_name, host_name, message, options, plugin_output, service_description, state_type = [''] * 9
            time= line[1:11]
            #print "i start with a timestamp", time
            first_type_pos = line.find(' ') + 1
            last_type_pos = line.find(':')
            first_detail_pos = last_type_pos + 2
            type = line[first_type_pos:last_type_pos]
            options = line[first_detail_pos:]
            message = line
            if type == 'CURRENT SERVICE STATE':
                logobject = LOGOBJECT_SERVICE
                logclass = LOGCLASS_STATE
                host_name, service_description, state, state_type, attempt, plugin_output = options.split(';', 5)
            elif type == 'INITIAL SERVICE STATE':
                logobject = LOGOBJECT_SERVICE
                logclass = LOGCLASS_STATE
                host_name, service_description, state, state_type, attempt, plugin_output = options.split(';', 5)
            elif type == 'SERVICE ALERT':
                # SERVICE ALERT: srv-40;Service-9;CRITICAL;HARD;1;[Errno 2] No such file or directory
                logobject = LOGOBJECT_SERVICE
                logclass = LOGCLASS_ALERT
                host_name, service_description, state, state_type, attempt, plugin_output = options.split(';', 5)
                state = service_states[state]
            elif type == 'SERVICE DOWNTIME ALERT':
                logobject = LOGOBJECT_SERVICE
                logclass = LOGCLASS_ALERT
                host_name, service_description, state_type, comment = options.split(';', 3)
            elif type == 'SERVICE FLAPPING ALERT':
                logobject = LOGOBJECT_SERVICE
                logclass = LOGCLASS_ALERT
                host_name, service_description, state_type, comment = options.split(';', 3)

            elif type == 'CURRENT HOST STATE':
                logobject = LOGOBJECT_HOST
                logclass = LOGCLASS_STATE
                host_name, state, state_type, attempt, plugin_output = options.split(';', 4)
            elif type == 'INITIAL HOST STATE':
                logobject = LOGOBJECT_HOST
                logclass = LOGCLASS_STATE
                host_name, state, state_type, attempt, plugin_output = options.split(';', 4)
            elif type == 'HOST ALERT':
                logobject = LOGOBJECT_HOST
                logclass = LOGCLASS_ALERT
                host_name, state, state_type, attempt, plugin_output = options.split(';', 4)
                state = host_states[state]
            elif type == 'HOST DOWNTIME ALERT':
                logobject = LOGOBJECT_HOST
                logclass = LOGCLASS_ALERT
                host_name, state_type, comment = options.split(';', 2)
            elif type == 'HOST FLAPPING ALERT':
                logobject = LOGOBJECT_HOST
                logclass = LOGCLASS_ALERT
                host_name, state_type, comment = options.split(';', 2)

            elif type == 'SERVICE NOTIFICATION':
                # tust_cuntuct;test_host_0;test_ok_0;CRITICAL;notify-service;i am CRITICAL  <-- normal
                # SERVICE NOTIFICATION: test_contact;test_host_0;test_ok_0;DOWNTIMESTART (OK);notify-service;OK
                logobject = LOGOBJECT_SERVICE
                logclass = LOGCLASS_NOTIFICATION
                contact_name, host_name, service_description, state_type, command_name, check_plugin_output = options.split(';', 5)
                if '(' in state_type: # downtime/flapping/etc-notifications take the type UNKNOWN
                    state_type = 'UNKNOWN'
                state = service_states[state_type]
            elif type == 'HOST NOTIFICATION':
                # tust_cuntuct;test_host_0;DOWN;notify-host;i am DOWN
                logobject = LOGOBJECT_HOST
                logclass = LOGCLASS_NOTIFICATION
                contact_name, host_name, state_type, command_name, check_plugin_output = options.split(';', 4)
                if '(' in state_type:
                    state_type = 'UNKNOWN'
                state = host_states[state_type]

            elif type == 'PASSIVE SERVICE CHECK':
                logobject = LOGOBJECT_SERVICE
                logclass = LOGCLASS_PASSIVECHECK
                host_name, service_description, state, check_plugin_output = options.split(';', 3)
            elif type == 'PASSIVE HOST CHECK':
                logobject = LOGOBJECT_HOST
                logclass = LOGCLASS_PASSIVECHECK
                host_name, state, check_plugin_output = options.split(';', 2)

            elif type == 'SERVICE EVENT HANDLER':
                # SERVICE EVENT HANDLER: test_host_0;test_ok_0;CRITICAL;SOFT;1;eventhandler
                logobject = LOGOBJECT_SERVICE
                host_name, service_description, state, state_type, attempt, command_name = options.split(';', 5)
                state = service_states[state]
            elif type == 'HOST EVENT HANDLER':
                logobject = LOGOBJECT_HOST
                host_name, state, state_type, attempt, command_name = options.split(';', 4)
                state = host_states[state]

            elif type == 'EXTERNAL COMMAND':
                logobject = LOGOBJECT_INFO
                logclass = LOGCLASS_COMMAND
            elif type.startswith('starting...') or \
                 type.startswith('shutting down...') or \
                 type.startswith('Bailing out') or \
                 type.startswith('active mode...') or \
                 type.startswith('standby mode...'):
                logobject = LOGOBJECT_INFO
                logclass = LOGCLASS_PROGRAM
            else:
                pass
                #print "does not match"

            lineno = 0

            try:
                values = (logobject, attempt, logclass, command_name, comment, contact_name, host_name, lineno, message, options, plugin_output, service_description, state, state_type, time, type)
            except:
                print "Unexpected error:", sys.exc_info()[0]
            #print "LOG:", logobject, logclass, type, host_name, service_description, state, state_type, attempt, plugin_output, contact_name, comment, command_name
            #print "LOG:", values
            try:
                if logclass != LOGCLASS_INVALID:
                    if sqlite3.paramstyle == 'pyformat':
                        values = dict(zip([str(x) for x in xrange(len(values))], values))
                        self.dbcursor.execute('INSERT INTO LOGS VALUES(%(0)s, %(1)s, %(2)s, %(3)s, %(4)s, %(5)s, %(6)s, %(7)s, %(8)s, %(9)s, %(10)s, %(11)s, %(12)s, %(13)s, %(14)s, %(15)s)', values)
                    else:
                        self.dbcursor.execute('INSERT INTO LOGS VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', values)
                    self.dbconn.commit()
            except sqlite3.Error, e:
                print "An error occurred:", e.args[0]
                print "DATABASE ERROR!!!!!!!!!!!!!!!!!"
        self.livestatus.count_event('log_message')