def _get_status(self): self.new_hosts = dict() try: events = self._get_all_events() for event in events: event_check = event['check'] event_client = event['client'] new_service = GenericService() new_service.event_id = event['id'] new_service.host = event_client['name'] new_service.name = event_check['name'] # Uchiwa needs the 'dc' for re_check; Sensu does not if 'dc' in event: new_service.site = event['dc'] else: new_service.site = None new_service.status = None try: new_service.status = self.SEVERITY_CODE_TEXT_MAP.get( event_check['status']) except KeyError: new_service.status = 'UNKNOWN' last_check_time = datetime.utcfromtimestamp( int(event['timestamp'])) new_service.last_check = self._aslocaltimestr(last_check_time) new_service.duration = HumanReadableDurationFromTimestamp( int(event['last_state_change'])) new_service.status_information = event_check['output'] # needs a / with a number on either side to work new_service.attempt = str(event['occurrences']) + '/1' new_service.passiveonly = False new_service.notifications_disabled = event['silenced'] new_service.flapping = False new_service.acknowledged = event['silenced'] new_service.scheduled_downtime = False self._insert_service_to_hosts(new_service) except: self.isChecking = False result, error = self.Error(sys.exc_info()) print(traceback.format_exc()) return Result(result=result, error=error) return Result(error="")
def _get_status(self): """ Get status from Zabbix Server """ ret = Result() # create Nagios items dictionary with to lists for services and hosts # every list will contain a dictionary for every failed service/host # this dictionary is only temporarily nagitems = {"services": [], "hosts": []} # Create URLs for the configured filters if self.zapi is None: self._login() try: hosts = [] try: hosts = self.zapi.host.get({ "output": [ "host", "ip", "status", "available", "error", "errors_from" ], "filter": {} }) except (ZabbixError, ZabbixAPIException): # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) return Result(result=result, error=error) for host in hosts: # if host is disabled on server safely ignore it if host['available'] != '0': n = { 'host': host['host'], 'status': self.statemap.get(host['status'], host['status']), 'last_check': 'n/a', 'duration': HumanReadableDurationFromTimestamp( host['errors_from']), 'status_information': host['error'], 'attempt': '1/1', 'site': '', 'address': host['host'], } # Zabbix shows OK hosts too - kick 'em! if not n['status'] == 'OK': # add dictionary full of information about this host item to nagitems nagitems["hosts"].append(n) # after collection data in nagitems create objects from its informations # host objects contain service objects if n["host"] not in self.new_hosts: new_host = n["host"] self.new_hosts[new_host] = GenericHost() self.new_hosts[new_host].name = n["host"] self.new_hosts[new_host].status = n["status"] self.new_hosts[new_host].last_check = n[ "last_check"] self.new_hosts[new_host].duration = n["duration"] self.new_hosts[new_host].attempt = n["attempt"] self.new_hosts[new_host].status_information = n[ "status_information"] self.new_hosts[new_host].site = n["site"] self.new_hosts[new_host].address = n["address"] except ZabbixError: self.isChecking = False result, error = self.Error(sys.exc_info()) return Result(result=result, error=error) # services services = [] # groupids = [] # never used - probably old code zabbix_triggers = [] try: api_version = self.zapi.api_version() except ZabbixAPIException: # FIXME Is there a cleaner way to handle this? I just borrowed # this code from 80 lines ahead. -- AGV # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) try: # response = [] # never used - probably old code try: triggers_list = [] hostgroup_ids = [ x['groupid'] for x in self.zapi.hostgroup.get( { 'output': 'extend', 'with_monitored_items': True }) if int(x['internal']) == 0 ] zabbix_triggers = self.zapi.trigger.get({ 'sortfield': 'lastchange', 'withLastEventUnacknowledged': True, 'groupids': hostgroup_ids, "monitored": True, "filter": { 'value': 1 } }) triggers_list = [] for trigger in zabbix_triggers: triggers_list.append(trigger.get('triggerid')) this_trigger = self.zapi.trigger.get({ 'triggerids': triggers_list, 'expandDescription': True, 'output': 'extend', 'select_items': 'extend', # thats for zabbix api 1.8 'selectItems': 'extend', # thats for zabbix api 2.0+ 'expandData': True }) if type(this_trigger) is dict: for triggerid in list(this_trigger.keys()): services.append(this_trigger[triggerid]) this_item = self.zapi.item.get({ 'itemids': [this_trigger[triggerid]['items'][0]['itemid']], 'selectApplications': 'extend' }) last_app = len(this_item[0]['applications']) - 1 this_trigger[triggerid]['application'] = this_item[0][ 'applications'][last_app]['name'] elif type(this_trigger) is list: for trigger in this_trigger: services.append(trigger) this_item = self.zapi.item.get({ 'itemids': trigger['items'][0]['itemid'], 'selectApplications': 'extend' }) # last_app = 0 # use it to get the first application name last_app = len( this_item[0]['applications'] ) - 1 # use it to get the last application name trigger['application'] = this_item[0]['applications'][ last_app]['name'] except ZabbixAPIException: # FIXME Is there a cleaner way to handle this? I just borrowed # this code from 80 lines ahead. -- AGV # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) except ZabbixError as e: if e.terminate: return e.result else: service = e.result.content ret = e.result for service in services: # Zabbix probably shows OK services too - kick 'em! # UPDATE Zabbix api 3.0 doesn't but I didn't tried with older # so I left it status = self.statemap.get(service['priority'], service['priority']) if not status == 'OK': if not service['description'].endswith('...'): state = service['description'] else: state = service['items'][0]['lastvalue'] lastcheck = 0 for item in service['items']: if int(item['lastclock']) > lastcheck: lastcheck = int(item['lastclock']) if len(service['comments']) == 0: srvc = service['application'] else: srvc = self.nagiosify_service(service['comments']) n = { 'service': srvc, 'status': status, # 1/1 attempt looks at least like there has been any attempt 'attempt': '1/1', 'duration': HumanReadableDurationFromTimestamp( service['lastchange']), 'status_information': state, 'passiveonly': 'no', 'last_check': time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(lastcheck)), 'notifications': 'yes', 'flapping': 'no', 'site': '', 'command': 'zabbix', 'triggerid': service['triggerid'], } if api_version >= '3.0': n['host'] = self.zapi.host.get({ "output": ["host"], "filter": {}, "triggerids": service['triggerid'] })[0]['host'] else: n['host'] = service['host'] nagitems["services"].append(n) # after collection data in nagitems create objects of its informations # host objects contain service objects if n["host"] not in self.new_hosts: self.new_hosts[n["host"]] = GenericHost() self.new_hosts[n["host"]].name = n["host"] self.new_hosts[n["host"]].status = "UP" self.new_hosts[n["host"]].site = n["site"] self.new_hosts[n["host"]].address = n["host"] # if a service does not exist create its object if n["service"] not in self.new_hosts[n["host"]].services: # workaround for non-existing (or not found) host status flag if n["service"] == "Host is down %s" % (n["host"]): self.new_hosts[n["host"]].status = "DOWN" # also take duration from "service" aka trigger self.new_hosts[n["host"]].duration = n["duration"] else: new_service = n["service"] self.new_hosts[n["host"]].services[ new_service] = GenericService() self.new_hosts[n["host"]].services[ new_service].host = n["host"] self.new_hosts[n["host"]].services[ new_service].name = n["service"] self.new_hosts[n["host"]].services[ new_service].status = n["status"] self.new_hosts[n["host"]].services[ new_service].last_check = n["last_check"] self.new_hosts[n["host"]].services[ new_service].duration = n["duration"] self.new_hosts[n["host"]].services[ new_service].attempt = n["attempt"] self.new_hosts[n["host"]].services[ new_service].status_information = n[ "status_information"] self.new_hosts[n["host"]].services[ new_service].passiveonly = False self.new_hosts[n["host"]].services[ new_service].flapping = False self.new_hosts[n["host"]].services[ new_service].site = n["site"] self.new_hosts[n["host"]].services[ new_service].address = n["host"] self.new_hosts[n["host"]].services[ new_service].command = n["command"] self.new_hosts[n["host"]].services[ new_service].triggerid = n["triggerid"] except (ZabbixError, ZabbixAPIException): # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) return ret
def _get_status(self): """ Get status from Zabbix Server """ # ========================================= # problems # ========================================= problems = [] try: #Are we logged in? if self.zlapi is None: self.zlapi = ZabbixLightApi(server_name=self.name, monitor_url=self.monitor_url, validate_certs=self.validate_certs) #zabbix could get an upgrade between checks, we need to check version each time self.zbx_version = self.zlapi.do_request("apiinfo.version", {}, no_auth=True) #check are we still logged in, if not, relogin if not self.zlapi.logged_in(): self.zlapi.login(self.username, self.password) #Get all current problems (trigger based), no need to check acknowledged problems if they are filtered out (load reduce) if conf.filter_acknowledged_hosts_services: problems = self.zlapi.do_request("problem.get", { 'recent': False, 'acknowledged': False }) else: problems = self.zlapi.do_request("problem.get", {'recent': False}) for problem in problems: #get trigger which rose current problem trigger = self.zlapi.do_request( "trigger.get", { 'triggerids': problem['objectid'], 'monitored': True, 'active': True, 'skipDependent': True, 'selectHosts': [ 'hostid', 'name', 'maintenance_status', 'available', 'error', 'errors_from', 'ipmi_available', 'ipmi_error', 'ipmi_errors_from', 'jmx_available', 'jmx_error', 'jmx_errors_from', 'snmp_available', 'snmp_error', 'snmp_errors_from' ], 'selectItems': ['key_', 'lastclock'] }) #problems on disabled/maintenance/deleted hosts don't have triggers #have to do that because of how zabbix housekeeping service work #API reports past problems for hosts that no longer exist if not trigger: continue service_id = problem['eventid'] host_id = trigger[0]['hosts'][0]['hostid'] #new host to report, we only need to do that at first problem for that host if host_id not in self.new_hosts: self.new_hosts[host_id] = GenericHost() self.new_hosts[host_id].name = trigger[0]['hosts'][0][ 'name'] #host has active maintenance period if trigger[0]['hosts'][0]['maintenance_status'] == "1": self.new_hosts[host_id].scheduled_downtime = True #old api shows host interfaces status in host object if parse_version( self.zbx_version) < parse_version("5.4.0"): #host not available via agent if trigger[0]['hosts'][0].get('available', '0') == "2": self.new_hosts[host_id].status = "DOWN" self.new_hosts[ host_id].status_information = trigger[0][ 'hosts'][0]['error'] self.new_hosts[ host_id].duration = HumanReadableDurationFromTimestamp( trigger[0]['hosts'][0]['errors_from']) #host not available via ipmi if trigger[0]['hosts'][0].get('ipmi_available', '0') == "2": self.new_hosts[host_id].status = "DOWN" self.new_hosts[ host_id].status_information = trigger[0][ 'hosts'][0]['ipmi_error'] self.new_hosts[ host_id].duration = HumanReadableDurationFromTimestamp( trigger[0]['hosts'][0]['ipmi_errors_from']) #host not available via jmx if trigger[0]['hosts'][0].get('jmx_available', '0') == "2": self.new_hosts[host_id].status = "DOWN" self.new_hosts[ host_id].status_information = trigger[0][ 'hosts'][0]['jmx_error'] self.new_hosts[ host_id].duration = HumanReadableDurationFromTimestamp( trigger[0]['hosts'][0]['jmx_errors_from']) #host not available via snmp if trigger[0]['hosts'][0].get('snmp_available', '0') == "2": self.new_hosts[host_id].status = "DOWN" self.new_hosts[ host_id].status_information = trigger[0][ 'hosts'][0]['snmp_error'] self.new_hosts[ host_id].duration = HumanReadableDurationFromTimestamp( trigger[0]['hosts'][0]['snmp_errors_from']) #new api shows host interfaces status in hostinterfaces object else: #get all host interfaces hostinterfaces = self.zlapi.do_request( "hostinterface.get", {"hostids": host_id}) #check them all and mark host as DOWN on first not available interface for hostinterface in hostinterfaces: if hostinterface.get('available', '0') == "2": self.new_hosts[host_id].status = "DOWN" self.new_hosts[ host_id].status_information = hostinterface[ 'error'] self.new_hosts[ host_id].duration = HumanReadableDurationFromTimestamp( hostinterface['errors_from']) #we stop checking rest of interfaces break #service to report self.new_hosts[host_id].services[service_id] = GenericService() self.new_hosts[host_id].services[service_id].host = trigger[0][ 'hosts'][0]['name'] self.new_hosts[host_id].services[ service_id].status = self.statemap.get( problem['severity'], problem['severity']) self.new_hosts[host_id].services[ service_id].duration = HumanReadableDurationFromTimestamp( problem['clock']) self.new_hosts[host_id].services[service_id].name = trigger[0][ 'items'][0]['key_'] self.new_hosts[host_id].services[ service_id].last_check = time.strftime( "%d/%m/%Y %H:%M:%S", time.localtime(int( trigger[0]['items'][0]['lastclock']))) #we add opdata to status information just like in zabbix GUI if problem["opdata"] != "": self.new_hosts[host_id].services[ service_id].status_information = problem[ 'name'] + " (" + problem["opdata"] + ")" else: self.new_hosts[host_id].services[ service_id].status_information = problem['name'] #service is acknowledged if problem['acknowledged'] == "1": self.new_hosts[host_id].services[ service_id].acknowledged = True except ZabbixLightApiException: # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) return Result(result=result, error=error) return Result()
def _get_status(self): """ Get status from Zabbix Server """ ret = Result() # create Nagios items dictionary with to lists for services and hosts # every list will contain a dictionary for every failed service/host # this dictionary is only temporarily nagitems = {"services": [], "hosts": []} # Create URLs for the configured filters if self.zapi is None: self._login() try: # ========================================= # Hosts Zabbix API data # ========================================= hosts = [] try: hosts = self.zapi.host.get({"output": ["hostid", "host", "name", "status", \ "available", "error", "errors_from", \ "snmp_available", "snmp_error", "snmp_errors_from", \ "ipmi_available", "ipmi_error", "ipmi_errors_from", \ "jmx_available", "jmx_error", "jmx_errors_from", \ "maintenance_status", "maintenance_from"], "selectInterfaces": ["ip"], "filter": {}}) except (ZabbixError, ZabbixAPIException, APITimeout, Already_Exists): # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) return Result(result=result, error=error) # get All Hosts. # 1. Store data in cache (to be used by events) # 2. We store as faulty two kinds of hosts incidences: # - Disabled hosts # - Hosts with issues trying to connect to agent/service # - In maintenance # status = 1 -> Disabled # available ZBX: 0 -> No agents 1 -> available 2-> Agent access error # ipmi_available IPMI: 0 -> No agents 1 -> available 2-> Agent access error # maintenance_status = 1 In maintenance for host in hosts: n = { 'host': host['host'], 'name': host['name'], 'server': self.name, 'status': 'UP', # Host is OK by default 'last_check': 'n/a', 'duration': '', 'attempt': 'N/A', 'status_information': '', # status flags 'passiveonly': False, 'notifications_disabled': False, 'flapping': False, 'acknowledged': False, 'scheduled_downtime': False, # Zabbix backend data 'hostid': host['hostid'], 'site': '', 'address': host['interfaces'][0]['ip'], } if host['maintenance_status'] == '1': n['scheduled_downtime'] = True if host['status'] == '1': # filter services and hosts by "filter_hosts_services_disabled_notifications" n['notifications_disabled'] = True # Filter only hosts by filter "Host & services with disabled checks" n['passiveonly'] = True # attempt to fix https://github.com/HenriWahl/Nagstamon/issues/535 # if host['available'] == '0' and host['snmp_available'] == '0' and host['ipmi_available'] == '0' and host['jmx_available'] == '0': # n['status'] = 'UNREACHABLE' # n['status_information'] = 'Host agents in unknown state' # n['duration'] = 'Unknown' if host['ipmi_available'] == '2': n['status'] = 'DOWN' n['status_information'] = host['ipmi_error'] n['duration'] = HumanReadableDurationFromTimestamp(host['ipmi_errors_from']) if host['snmp_available'] == '2': n['status'] = 'DOWN' n['status_information'] = host['snmp_error'] n['duration'] = HumanReadableDurationFromTimestamp(host['snmp_errors_from']) if host['jmx_available'] == '2': n['status'] = 'DOWN' n['status_information'] = host['jmx_error'] n['duration'] = HumanReadableDurationFromTimestamp(host['jmx_errors_from']) if host['available'] == '2': n['status'] = 'DOWN' n['status_information'] = host['error'] n['duration'] = HumanReadableDurationFromTimestamp(host['errors_from']) # Zabbix shows OK hosts too - kick 'em! if not n['status'] == 'UP': # add dictionary full of information about this host item to nagitems nagitems["hosts"].append(n) # after collection data in nagitems create objects from its informations # host objects contain service objects #key_host = n["host"] key_host = n["name"] if len(n['name']) != 0 else n["host"]; #key_host = n["hostid"] if key_host not in self.new_hosts: self.new_hosts[key_host] = GenericHost() self.new_hosts[key_host].hostid = n["hostid"] self.new_hosts[key_host].host = n["host"] self.new_hosts[key_host].name = n["name"] self.new_hosts[key_host].status = n["status"] self.new_hosts[key_host].last_check = n["last_check"] self.new_hosts[key_host].duration = n["duration"] self.new_hosts[key_host].attempt = n["attempt"] self.new_hosts[key_host].status_information = n["status_information"] self.new_hosts[key_host].site = n["site"] self.new_hosts[key_host].address = n["address"] self.new_hosts[key_host].notifications_disabled = n["notifications_disabled"] self.new_hosts[key_host].scheduled_downtime = n["scheduled_downtime"] self.new_hosts[key_host].passiveonly = n["passiveonly"] self.new_hosts[key_host].acknowledged = n["acknowledged"] self.new_hosts[key_host].flapping = n["flapping"] except ZabbixError: self.isChecking = False result, error = self.Error(sys.exc_info()) return Result(result=result, error=error) # ========================================= # services # ========================================= services = [] try: api_version = self.zapi.api_version() except ZabbixAPIException: # FIXME Is there a cleaner way to handle this? I just borrowed # this code from 80 lines ahead. -- AGV # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) try: try: # Get a list of all issues (AKA tripped triggers) # Zabbix 3+ returns array of objects triggers = self.zapi.trigger.get({'only_true': True, 'skipDependent': True, 'monitored': True, 'active': True, 'output': 'extend', 'expandDescription': True, 'selectHosts': ['hostid','host','name'], 'selectItems': ['itemid','name','key_','lastvalue','state','lastclock'], # thats for zabbix api 2.0+ 'filter': {'value': 1}, }) # Do another query to find out which issues are Unacknowledged # Zabbix 3+ returns array of objects unack_triggers = self.zapi.trigger.get({'only_true': True, 'skipDependent': True, 'monitored': True, 'active': True, 'output': ['triggerid'], 'expandDescription': True, 'selectHosts': ['hostid'], 'withLastEventUnacknowledged': True, }) unack_trigger_ids = [u['triggerid'] for u in unack_triggers] for t in triggers: t['acknowledged'] = False if t['triggerid'] in unack_trigger_ids else True # get Application name for the trigger this_item = self.zapi.item.get( {'itemids': [t['items'][0]['itemid']], 'output': ['itemid', 'hostid', 'name', 'lastvalue'], 'selectApplications': 'extend'} ) t['application'] = self.getLastApp(this_item) try: t['lastvalue'] = this_item[0]['lastvalue'] except IndexError as e: self.Debug(server=self.get_name(), debug="ItemID '%s' has no values" % t['items'][0]['itemid'], head='WARNING') services.append(t) except ZabbixAPIException: # FIXME Is there a cleaner way to handle this? I just borrowed # this code from 80 lines ahead. -- AGV # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) except ZabbixError as e: if e.terminate: return e.result else: service = e.result.content ret = e.result for service in services: # Zabbix probably shows OK services too - kick 'em! # UPDATE Zabbix api 3.0 doesn't but I didn't tried with older # so I left it status = self.statemap.get(service['priority'], service['priority']) # self.Debug(server=self.get_name(), debug="SERVICE (" + service['application'] + ") STATUS: **" + status + "** PRIORITY: #" + service['priority']) # self.Debug(server=self.get_name(), debug="-----======== SERVICE " + str(service)) if not status == 'OK': if not service['description'].endswith('...'): state = service['description'] else: state = service['items'][0]['lastvalue'] # A trigger can be triggered by multiple items # Get last checking date of any of the items involved lastcheck = 0 for item in service['items']: if int(item['lastclock']) > lastcheck: lastcheck = int(item['lastclock']) if self.use_description_name_service and \ len(service['comments']) != 0: srvc = self.nagiosify_service(service['comments']) else: srvc = service['application'] n = { 'host': '', 'hostname': '', 'service': service['triggerid'], 'server': self.name, 'status': status, # Putting service in attempt column allow to see it in GUI 'attempt': srvc, 'duration': HumanReadableDurationFromTimestamp(service['lastchange']), 'status_information': state, 'last_check': time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(lastcheck)), 'site': '', 'command': 'zabbix', # status flags 'passiveonly': False, 'notifications_disabled': False, 'flapping': False, 'acknowledged' : service['acknowledged'], 'scheduled_downtime': False, # Zabbix data 'triggerid': service['triggerid'], } n['hostid'] = service['hosts'][0]['hostid'] n['host'] = service['hosts'][0]['host'] n['hostname'] = service['hosts'][0]['name'] key = n["hostname"] if len(n['hostname']) != 0 else n["host"]; #key = n["hostid"]; if self.new_hosts[key].scheduled_downtime == True: n['scheduled_downtime'] = True nagitems["services"].append(n) # after collection data in nagitems create objects of its informations # host objects contain service objects # if not created previously, host should be created with proper data if key not in self.new_hosts: # This should never happen, because we've stored all hosts in new_hosts array print("================================") print("Host " + key + "Not found in host cache") if conf.debug_mode is True: self.Debug(server=self.get_name(), debug="Host not found [" + key + "]") # if a service does not exist create its object new_service = n["triggerid"] if new_service not in self.new_hosts[key].services: self.new_hosts[key].services[new_service] = GenericService() self.new_hosts[key].services[new_service].host = n["hostname"] if len(n['hostname']) != 0 else n["host"] self.new_hosts[key].services[new_service].name = n["service"] self.new_hosts[key].services[new_service].status = n["status"] self.new_hosts[key].services[new_service].last_check = n["last_check"] self.new_hosts[key].services[new_service].duration = n["duration"] self.new_hosts[key].services[new_service].attempt = n["attempt"] self.new_hosts[key].services[new_service].status_information = n["status_information"] self.new_hosts[key].services[new_service].passiveonly = n["passiveonly"] self.new_hosts[key].services[new_service].notifications_disabled = n["notifications_disabled"] self.new_hosts[key].services[new_service].flapping = n["flapping"] self.new_hosts[key].services[new_service].acknowledged = n["acknowledged"] self.new_hosts[key].services[new_service].scheduled_downtime = n["scheduled_downtime"] self.new_hosts[key].services[new_service].site = n["site"] self.new_hosts[key].services[new_service].address = self.new_hosts[key].address self.new_hosts[key].services[new_service].command = n["command"] self.new_hosts[key].services[new_service].hostid = n["hostid"] self.new_hosts[key].services[new_service].triggerid = n["triggerid"] if conf.debug_mode is True: self.Debug(server=self.get_name(), debug="Adding new service[" + new_service + "] **" + n['service'] + "**") except (ZabbixError, ZabbixAPIException): # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) return ret
def _get_status(self): """ Get status from Zabbix Server """ # Login to ZabbixAPI self._login() # ========================================= # problems # ========================================= problems = [] try: #Get all current problems (trigger based) problems = self.zapi.problem.get({'recent': False}) for problem in problems: #get trigger which rose current problem trigger = self.zapi.trigger.get({'triggerids': problem['objectid'], 'monitored': True, 'active': True, 'skipDependent': True, 'selectHosts': ['hostid', 'name', 'maintenance_status', 'available', 'error', 'errors_from', 'ipmi_available', 'ipmi_error', 'ipmi_errors_from', 'jmx_available', 'jmx_error', 'jmx_errors_from', 'snmp_available', 'snmp_error', 'snmp_errors_from'], 'selectItems': ['key_', 'lastclock']}) #problems on disabled/maintenance/deleted hosts don't have triggers #have to do that because of how zabbix housekeeping service work #API reports past problems for hosts that no longer exist if not trigger: continue service_id = problem['eventid'] host_id = trigger[0]['hosts'][0]['hostid'] #new host to report, we only need to do that at first problem for that host if host_id not in self.new_hosts: self.new_hosts[host_id] = GenericHost() self.new_hosts[host_id].name = trigger[0]['hosts'][0]['name'] #host has active maintenance period if trigger[0]['hosts'][0]['maintenance_status'] == "1": self.new_hosts[host_id].scheduled_downtime = True #host not available via agent if trigger[0]['hosts'][0]['available'] == "2": self.new_hosts[host_id].status = "DOWN" self.new_hosts[host_id].status_information = trigger[0]['hosts'][0]['error'] self.new_hosts[host_id].duration = HumanReadableDurationFromTimestamp(trigger[0]['hosts'][0]['errors_from']) #host not available via ipmi if trigger[0]['hosts'][0]['ipmi_available'] == "2": self.new_hosts[host_id].status = "DOWN" self.new_hosts[host_id].status_information = trigger[0]['hosts'][0]['ipmi_error'] self.new_hosts[host_id].duration = HumanReadableDurationFromTimestamp(trigger[0]['hosts'][0]['ipmi_errors_from']) #host not available via jmx if trigger[0]['hosts'][0]['jmx_available'] == "2": self.new_hosts[host_id].status = "DOWN" self.new_hosts[host_id].status_information = trigger[0]['hosts'][0]['jmx_error'] self.new_hosts[host_id].duration = HumanReadableDurationFromTimestamp(trigger[0]['hosts'][0]['jmx_errors_from']) #host not available via snmp if trigger[0]['hosts'][0]['snmp_available'] == "2": self.new_hosts[host_id].status = "DOWN" self.new_hosts[host_id].status_information = trigger[0]['hosts'][0]['snmp_error'] self.new_hosts[host_id].duration = HumanReadableDurationFromTimestamp(trigger[0]['hosts'][0]['snmp_errors_from']) #service to report self.new_hosts[host_id].services[service_id] = GenericService() self.new_hosts[host_id].services[service_id].host = trigger[0]['hosts'][0]['name'] self.new_hosts[host_id].services[service_id].status = self.statemap.get(problem['severity'], problem['severity']) self.new_hosts[host_id].services[service_id].duration = HumanReadableDurationFromTimestamp(problem['clock']) self.new_hosts[host_id].services[service_id].name = trigger[0]['items'][0]['key_'] self.new_hosts[host_id].services[service_id].last_check = time.strftime("%d/%m/%Y %H:%M:%S", time.localtime(int(trigger[0]['items'][0]['lastclock']))) #we add opdata to status information just like in zabbix GUI if problem["opdata"] != "": self.new_hosts[host_id].services[service_id].status_information = problem['name'] + " (" + problem["opdata"] + ")" else: self.new_hosts[host_id].services[service_id].status_information = problem['name'] #service is acknowledged if problem['acknowledged'] == "1": self.new_hosts[host_id].services[service_id].acknowledged = True except (ZabbixAPIException): # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) return Result()
def _get_status(self): """ Get status from Thruk Server """ # new_hosts dictionary self.new_hosts = dict() # hosts - mostly the down ones # unfortunately the hosts status page has a different structure so # hosts must be analyzed separately try: # JSON experiments result = self.FetchURL(self.cgiurl_hosts, giveback='raw') jsonraw, error, status_code = copy.deepcopy(result.result),\ copy.deepcopy(result.error),\ result.status_code # check if any error occured errors_occured = self.check_for_error(jsonraw, error, status_code) # if there are errors return them if errors_occured != False: return(errors_occured) # in case basic auth did not work try form login cookie based login if jsonraw.startswith("<"): self.refresh_authentication = True return Result(result=None, error="Login failed") # in case JSON is not empty evaluate it elif not jsonraw == "[]": hosts = json.loads(jsonraw) for h in hosts: if h["name"] not in self.new_hosts: self.new_hosts[h["name"]] = GenericHost() self.new_hosts[h["name"]].name = h["name"] self.new_hosts[h["name"]].server = self.name self.new_hosts[h["name"]].status = self.STATES_MAPPING["hosts"][h["state"]] self.new_hosts[h["name"]].last_check = datetime.datetime.fromtimestamp(int(h["last_check"])).isoformat(" ") self.new_hosts[h["name"]].duration = HumanReadableDurationFromTimestamp(h["last_state_change"]) self.new_hosts[h["name"]].attempt = "%s/%s" % (h["current_attempt"], h["max_check_attempts"]) self.new_hosts[h["name"]].status_information = h["plugin_output"].replace("\n", " ").strip() self.new_hosts[h["name"]].passiveonly = not(bool(int(h["active_checks_enabled"]))) self.new_hosts[h["name"]].notifications_disabled = not(bool(int(h["notifications_enabled"]))) self.new_hosts[h["name"]].flapping = bool(int(h["is_flapping"])) self.new_hosts[h["name"]].acknowledged = bool(int(h["acknowledged"])) self.new_hosts[h["name"]].scheduled_downtime = bool(int(h["scheduled_downtime_depth"])) self.new_hosts[h["name"]].status_type = {0: "soft", 1: "hard"}[h["state_type"]] del h except: import traceback traceback.print_exc(file=sys.stdout) # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) return Result(result=result, error=error) # services try: # JSON experiments result = self.FetchURL(self.cgiurl_services, giveback="raw") jsonraw, error, status_code = copy.deepcopy(result.result),\ copy.deepcopy(result.error),\ result.status_code # check if any error occured errors_occured = self.check_for_error(jsonraw, error, status_code) # if there are errors return them if errors_occured != False: return(errors_occured) # in case basic auth did not work try form login cookie based login if jsonraw.startswith("<"): self.refresh_authentication = True return Result(result=None, error="Login failed") # in case JSON is not empty evaluate it elif not jsonraw == "[]": services = json.loads(jsonraw) for s in services: # host objects contain service objects if s["host_name"] not in self.new_hosts: self.new_hosts[s["host_name"]] = GenericHost() self.new_hosts[s["host_name"]].name = s["host_name"] self.new_hosts[s["host_name"]].server = self.name self.new_hosts[s["host_name"]].status = "UP" # if a service does not exist create its object if s["description"] not in self.new_hosts[s["host_name"]].services: # ##new_service = s["description"] self.new_hosts[s["host_name"]].services[s["description"]] = GenericService() self.new_hosts[s["host_name"]].services[s["description"]].host = s["host_name"] self.new_hosts[s["host_name"]].services[s["description"]].name = s["description"] self.new_hosts[s["host_name"]].services[s["description"]].server = self.name self.new_hosts[s["host_name"]].services[s["description"]].status = self.STATES_MAPPING["services"][s["state"]] self.new_hosts[s["host_name"]].services[s["description"]].last_check = datetime.datetime.fromtimestamp(int(s["last_check"])).isoformat(" ") self.new_hosts[s["host_name"]].services[s["description"]].duration = HumanReadableDurationFromTimestamp(s["last_state_change"]) self.new_hosts[s["host_name"]].services[s["description"]].attempt = "%s/%s" % (s["current_attempt"], s["max_check_attempts"]) self.new_hosts[s["host_name"]].services[s["description"]].status_information = s["plugin_output"].replace("\n", " ").strip() self.new_hosts[s["host_name"]].services[s["description"]].passiveonly = not(bool(int(s["active_checks_enabled"]))) self.new_hosts[s["host_name"]].services[s["description"]].notifications_disabled = not(bool(int(s["notifications_enabled"]))) self.new_hosts[s["host_name"]].services[s["description"]].flapping = not(bool(int(s["notifications_enabled"]))) self.new_hosts[s["host_name"]].services[s["description"]].acknowledged = bool(int(s["acknowledged"])) self.new_hosts[s["host_name"]].services[s["description"]].scheduled_downtime = bool(int(s["scheduled_downtime_depth"])) self.new_hosts[s["host_name"]].services[s["description"]].status_type = {0: "soft", 1: "hard"}[s["state_type"]] del s except: import traceback traceback.print_exc(file=sys.stdout) # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) return Result(result=result, error=error) # dummy return in case all is OK return Result()
def _get_status(self): """ Get status from Nagios Server """ ret = Result() # create Nagios items dictionary with to lists for services and hosts # every list will contain a dictionary for every failed service/host # this dictionary is only temporarily nagitems = {"services": [], "hosts": []} # Create URLs for the configured filters if self.zapi is None: self._login() try: hosts = [] try: hosts = self.zapi.host.get({ "output": [ "host", "ip", "status", "available", "error", "errors_from" ], "filter": {} }) except: # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) return Result(result=result, error=error) for host in hosts: n = { 'host': host['host'], 'status': self.statemap.get(host['available'], host['available']), 'last_check': 'n/a', 'duration': HumanReadableDurationFromTimestamp(host['errors_from']), 'status_information': host['error'], 'attempt': '1/1', 'site': '', 'address': host['host'], } # add dictionary full of information about this host item to nagitems nagitems["hosts"].append(n) # after collection data in nagitems create objects from its informations # host objects contain service objects if n["host"] not in self.new_hosts: new_host = n["host"] self.new_hosts[new_host] = GenericHost() self.new_hosts[new_host].name = n["host"] self.new_hosts[new_host].status = n["status"] self.new_hosts[new_host].last_check = n["last_check"] self.new_hosts[new_host].duration = n["duration"] self.new_hosts[new_host].attempt = n["attempt"] self.new_hosts[new_host].status_information = n[ "status_information"] self.new_hosts[new_host].site = n["site"] self.new_hosts[new_host].address = n["address"] except ZabbixError: self.isChecking = False result, error = self.Error(sys.exc_info()) return Result(result=result, error=error) # services services = [] groupids = [] zabbix_triggers = [] try: api_version = self.zapi.api_version() except ZabbixAPIException: # FIXME Is there a cleaner way to handle this? I just borrowed # this code from 80 lines ahead. -- AGV # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) try: response = [] try: #service = self.zapi.trigger.get({"select_items":"extend","monitored":1,"only_true":1,"min_severity":3,"output":"extend","filter":{}}) triggers_list = [] if self.monitor_cgi_url: group_list = self.monitor_cgi_url.split(',') #hostgroup_ids = [x['groupid'] for x in self.zapi.hostgroup.get( # {'output': 'extend', # 'with_monitored_items': True, # 'filter': {"name": group_list}}) if int(x['internal']) == 0] # only without filter there is anything shown at all hostgroup_ids = [ x['groupid'] for x in self.zapi.hostgroup.get( { 'output': 'extend', 'with_monitored_items': True }) if int(x['internal']) == 0 ] zabbix_triggers = self.zapi.trigger.get({ 'sortfield': 'lastchange', 'withLastEventUnacknowledged': True, 'groupids': hostgroup_ids, "monitored": True, "filter": { 'value': 1 } }) else: zabbix_triggers = self.zapi.trigger.get({ 'sortfield': 'lastchange', 'withLastEventUnacknowledged': True, "monitored": True, "filter": { 'value': 1 } }) triggers_list = [] for trigger in zabbix_triggers: triggers_list.append(trigger.get('triggerid')) this_trigger = self.zapi.trigger.get({ 'triggerids': triggers_list, 'expandDescription': True, 'output': 'extend', 'select_items': 'extend', 'expandData': True }) if type(this_trigger) is dict: for triggerid in list(this_trigger.keys()): services.append(this_trigger[triggerid]) elif type(this_trigger) is list: for trigger in this_trigger: services.append(trigger) except ZabbixAPIException: # FIXME Is there a cleaner way to handle this? I just borrowed # this code from 80 lines ahead. -- AGV # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) except ZabbixError as e: #print "------------------------------------" #print "%s" % e.result.error if e.terminate: return e.result else: service = e.result.content ret = e.result for service in services: if api_version > '1.8': state = '%s' % service['description'] else: state = '%s=%s' % (service['items'][0]['key_'], service['items'][0]['lastvalue']) n = { 'host': service['host'], 'service': service['description'], 'status': self.statemap.get(service['priority'], service['priority']), # 1/1 attempt looks at least like there has been any attempt 'attempt': '1/1', 'duration': HumanReadableDurationFromTimestamp(service['lastchange']), 'status_information': state, 'passiveonly': 'no', 'last_check': 'n/a', 'notifications': 'yes', 'flapping': 'no', 'site': '', 'command': 'zabbix', 'triggerid': service['triggerid'], } nagitems["services"].append(n) # after collection data in nagitems create objects of its informations # host objects contain service objects if n["host"] not in self.new_hosts: self.new_hosts[n["host"]] = GenericHost() self.new_hosts[n["host"]].name = n["host"] self.new_hosts[n["host"]].status = "UP" self.new_hosts[n["host"]].site = n["site"] self.new_hosts[n["host"]].address = n["host"] # if a service does not exist create its object if n["service"] not in self.new_hosts[n["host"]].services: # workaround for non-existing (or not found) host status flag if n["service"] == "Host is down %s" % (n["host"]): self.new_hosts[n["host"]].status = "DOWN" # also take duration from "service" aka trigger self.new_hosts[n["host"]].duration = n["duration"] else: new_service = n["service"] self.new_hosts[n["host"]].services[ new_service] = GenericService() self.new_hosts[ n["host"]].services[new_service].host = n["host"] # next dirty workaround to get Zabbix events to look Nagios-esque if (" on " or " is ") in n["service"]: for separator in [" on ", " is "]: n["service"] = n["service"].split(separator)[0] self.new_hosts[n["host"]].services[ new_service].name = n["service"] self.new_hosts[n["host"]].services[ new_service].status = n["status"] self.new_hosts[n["host"]].services[ new_service].last_check = n["last_check"] self.new_hosts[n["host"]].services[ new_service].duration = n["duration"] self.new_hosts[n["host"]].services[ new_service].attempt = n["attempt"] self.new_hosts[n["host"]].services[ new_service].status_information = n[ "status_information"] #self.new_hosts[n["host"]].services[new_service].passiveonly = n["passiveonly"] self.new_hosts[n["host"]].services[ new_service].passiveonly = False #self.new_hosts[n["host"]].services[new_service].flapping = n["flapping"] self.new_hosts[ n["host"]].services[new_service].flapping = False self.new_hosts[ n["host"]].services[new_service].site = n["site"] self.new_hosts[n["host"]].services[ new_service].address = n["host"] self.new_hosts[n["host"]].services[ new_service].command = n["command"] self.new_hosts[n["host"]].services[ new_service].triggerid = n["triggerid"] except (ZabbixError, ZabbixAPIException): # set checking flag back to False self.isChecking = False result, error = self.Error(sys.exc_info()) print(sys.exc_info()) return Result(result=result, error=error) return ret
def _duration_since(self, timestamp): if (timestamp == 0) or (timestamp > time()): duration_text = 'n/a' else: duration_text = HumanReadableDurationFromTimestamp(timestamp) return duration_text