def execstr(self, host, check): res = None try: print('doing ' + check + ' on ' + host) host = Hosts.objects.get(name=host) check = HostChecks.objects.get(name=check) if host.enabled is True and check.enabled is True: # run = sh.Command("./bin/runthis.sh") # res = str(run(check.arg)) computestr(check, host, res) # print(oid + ' on ' + address + ' equals ' + res) except Exception as e: print('doing ' + check.name + ' on ' + host.name + ' failed') # update the error count setMetadata(host.name + ':' + check.name + '::lasterror', str(timezone.now())) setMetadata( host.name + ':' + check.name + '::nberror', int(getMetadata(host.name + ':' + check.name + '::nberror', 0)) + 1) # Log the error to the database. ErrorLog(hostcheck=check, host=host, event=e.__class__.__name__, error=str(e), value=res).save() return res
def snmpgetbool(self, host, check): res = None try: print('doing ' + check + ' on ' + host) host = Hosts.objects.get(name=host) if host.enabled is True: check = HostChecks.objects.get(name=check) address = host.address community = host.community oid = check.arg # res = str(snmp_get(oid, hostname=address, community=community, version=1).value) computebool(check, host, res) # print(oid + ' on ' + address + ' equals ' + res) except Exception as e: print('doing ' + check.name + ' on ' + host.name + ' failed') # update the error count setMetadata(host.name + ':' + check.name + '::lasterror', str(timezone.now())) setMetadata( host.name + ':' + check.name + '::nberror', int(getMetadata(host.name + ':' + check.name + '::nberror', 0)) + 1) # Log the error to the database. ErrorLog(hostcheck=check, host=host, event=e.__class__.__name__, error=str(e), value=res).save() return res
def setNotifs(request, host, check, status): if not request.user.has_perm('webview.silence_check'): raise PermissionDenied # toggle notification status per host-check combo. uses switchery in the UI. host = get_object_or_404(Hosts, name=host) check = get_object_or_404(HostChecks, name=check) if status == "false": status = False else: status = True setMetadata(host.name + ":" + check.name + "::notifs", status) return HttpResponse("done")
def getDelay(request, days): # Pulls the info for the avg task delay info bubble at the top ) # if days is None: # days = 1 # meandelay = getMetadata('taskdelay-' + str(days), None) # if meandelay is None: # tasks = TaskState.objects.filter( # tstamp__range=[timezone.now() - datetime.timedelta(days=int(days)), timezone.now()]).exclude( # runtime=None).values_list('runtime', flat=True) # meandelay = mean(tasks) meandelay = 1 setMetadata('taskdelay-' + str(days), meandelay) return HttpResponse(meandelay)
def computestr(check, host, value): from .models import Historical # create the db record history = Historical(host=host, hostcheck=check, value=value, timestamp=timezone.now()) setMetadata( host.name + ':' + check.name + '::lastvalue', getMetadata(host.name + ':' + check.name + '::value', 'No Data')) now = int(timezone.now().timestamp()) setMetadata(host.name + ':' + check.name + '::lastcheck', now) setMetadata(host.name + ':' + check.name + '::value', value) metadata = { 'error': getMetadata(host.name + ':' + check.name + '::error', 'OK') } thiserror = 'false' metadata['error'] = 'false' errors = alertstr(check, host, value) if check.colorizesla and len(errors) > 0: thiserror = 'ok' metadata['error'] = 'ok' # dbg(check.name + ' is colorizesla') # if colorizesla is true, we check if the sla is failing instead of the check slas = check.sla.filter(enabled=True) for sla in slas: # dbg('checking sla ' + sla.name + '. its status is ' + sla.status) if sla.status == 'failing': metadata['error'] = 'crit' thiserror = 'crit' else: for error in errors: if error.get('hasError', True): if error.get('strwarn', False): thiserror = 'warn' if error.get('strgood', False) or error.get('strbad', False): thiserror = 'crit' elif error.get('hasError', None) is False: thiserror = 'ok' metadata['error'] = setMetadata(host.name + ':' + check.name + '::error', thiserror) # save a snapshot of the metadata when the check ran history.data = json.dumps(metadata) history.save() return metadata
def snmpgetint(self, host, check): global checkname global hostname global res try: print('doing ' + check + ' on ' + host) hostname = Hosts.objects.get(name=host) if hostname.enabled is True: checkname = HostChecks.objects.get(name=check) address = hostname.address community = hostname.community oid = checkname.arg # res = str(float(snmp_get(oid, hostname=address, community=community, version=1).value) * float(check.quotient)) # from pysnmp import debug # debug.setLogger(debug.Debug('msgproc')) # Protocol version to use pMod = api.protoModules[api.protoVersion1] # pMod = api.protoModules[api.protoVersion2c] # Build PDU reqPDU = pMod.GetRequestPDU() pMod.apiPDU.setDefaults(reqPDU) pMod.apiPDU.setVarBinds(reqPDU, (((oid), pMod.Null()), )) # Build message reqMsg = pMod.Message() pMod.apiMessage.setDefaults(reqMsg) pMod.apiMessage.setCommunity(reqMsg, community) pMod.apiMessage.setPDU(reqMsg, reqPDU) startedAt = time() def cbTimerFun(timeNow): if timeNow - startedAt > 3: raise Exception("Request timed out") # noinspection PyUnusedLocal,PyUnusedLocal def cbRecvFun(transportDispatcher, transportDomain, transportAddress, wholeMsg, reqPDU=reqPDU): while wholeMsg: rspMsg, wholeMsg = decoder.decode(wholeMsg, asn1Spec=pMod.Message()) rspPDU = pMod.apiMessage.getPDU(rspMsg) # Match response to request if pMod.apiPDU.getRequestID( reqPDU) == pMod.apiPDU.getRequestID(rspPDU): # Check for SNMP errors reported errorStatus = pMod.apiPDU.getErrorStatus(rspPDU) if errorStatus: print(errorStatus.prettyPrint()) else: for oid, val in pMod.apiPDU.getVarBinds(rspPDU): # print('%s = %s' % (oid.prettyPrint(), val.prettyPrint())) # res = str(float(val.prettyPrint() * float(check.quotient))) global checkname global hostname global res res = str( float(val * float(checkname.quotient))) computeint(checkname, hostname, res) transportDispatcher.jobFinished(1) return wholeMsg transportDispatcher = AsyncoreDispatcher() transportDispatcher.registerRecvCbFun(cbRecvFun) # transportDispatcher.registerTimerCbFun(cbTimerFun) # UDP/IPv4 transportDispatcher.registerTransport( udp.domainName, udp.UdpSocketTransport().openClientMode()) # Pass message to dispatcher transportDispatcher.sendMessage(encoder.encode(reqMsg), udp.domainName, (address, 161)) transportDispatcher.jobStarted(1) ## UDP/IPv6 (second copy of the same PDU will be sent) # transportDispatcher.registerTransport( # udp6.domainName, udp6.Udp6SocketTransport().openClientMode() # ) # Pass message to dispatcher # transportDispatcher.sendMessage( # encoder.encode(reqMsg), udp6.domainName, ('::1', 161) # ) # transportDispatcher.jobStarted(1) ## Local domain socket # transportDispatcher.registerTransport( # unix.domainName, unix.UnixSocketTransport().openClientMode() # ) # # Pass message to dispatcher # transportDispatcher.sendMessage( # encoder.encode(reqMsg), unix.domainName, '/tmp/snmp-agent' # ) # transportDispatcher.jobStarted(1) # Dispatcher will finish as job#1 counter reaches zero transportDispatcher.runDispatcher() transportDispatcher.closeDispatcher() # print(transportDispatcher) del transportDispatcher del pMod del reqPDU del reqMsg del startedAt del cbTimerFun del cbRecvFun # computeint(check, host, res) # print(oid + ' on ' + address + ' equals ' + res) except Exception as e: print('doing ' + checkname.name + ' on ' + hostname.name + ' failed') print(traceback.format_exc()) # update the error count setMetadata(hostname.name + ':' + checkname.name + '::lasterror', str(timezone.now())) setMetadata( hostname.name + ':' + checkname.name + '::nberror', int( getMetadata(hostname.name + ':' + checkname.name + '::nberror', 0)) + 1) # Log the error to the database. ErrorLog(hostcheck=checkname, host=hostname, event=e.__class__.__name__, error=str(e), value=res).save() return res
def computeint(check, host, value): # we process the value we get from the check from .models import Historical # create the db record history = Historical(host=host, hostcheck=check, value=value, timestamp=timezone.now()) setMetadata( host.name + ':' + check.name + '::lastvalue', getMetadata(host.name + ':' + check.name + '::value', 'No Data')) now = int(timezone.now().timestamp()) setMetadata(host.name + ':' + check.name + '::lastcheck', now) setMetadata(host.name + ':' + check.name + '::value', value) metadata = { 'min': getMetadata(host.name + ':' + check.name + '::min', 'No Data'), 'max': getMetadata(host.name + ':' + check.name + '::max', 'No Data'), 'avg': getMetadata(host.name + ':' + check.name + '::avg', 'No Data'), 'error': getMetadata(host.name + ':' + check.name + '::error', 'OK') } # disable avg calculation for now. will do it out of band to avoid running late of checks # if (now - metadata.get('laststats', 0)) > check.statsinterval: # try: # castedata = [] # # This is the query that is loading the database, need to think about this. # data = Historical.objects.filter(host=host, hostcheck=check).values_list('value') # for i in data: # castedata.append(float(i[0])) # metadata['avg'] = mean(castedata) # except: # dbg('Could not compute avg of int') # metadata['avg'] = 0 # metadata['laststats'] = now # setMetadata(host.name + ':' + check.name + ':avg', str(metadata['avg'])) # setMetadata(host.name + ':' + check.name + ':laststats', now) # Update min if we are under # print(metadata) mdmin = metadata['min'] if mdmin == 'No Data' or float(value) < float(mdmin): metadata['min'] = setMetadata(host.name + ':' + check.name + '::min', str(value)) # Update max if we are over mdmax = metadata['max'] if mdmax == 'No Data' or float(value) > float(mdmax): metadata['max'] = setMetadata(host.name + ':' + check.name + '::max', str(value)) # process any alerts that could arise errors = alertint(check, host, value) thiserror = 'false' metadata['error'] = 'false' if check.colorizesla and len(errors) > 0: thiserror = 'ok' metadata['error'] = 'ok' # if colorizesla is true, we check if the sla is failing instead of the check slas = check.sla.filter(enabled=True) for sla in slas: if sla.status == 'failing': metadata['error'] = 'crit' thiserror = 'crit' else: for error in errors: if error.get('hasError', False): if error.get('lowcrit', False) or error.get('highcrit', False): thiserror = 'crit' elif error.get('lowwarn', False) or error.get( 'highwarn', False): thiserror = 'warn' elif error.get('hasError', None) is False: thiserror = 'ok' metadata['error'] = setMetadata(host.name + ':' + check.name + '::error', str(thiserror)) # save a snapshot of the metadata when the check ran history.data = json.dumps(metadata) history.save() return metadata
def computebool(check, host, value): value = booltoint(strtobool(value)) # we process the value we get from the check from .models import Historical # create the db record history = Historical(host=host, hostcheck=check, value=value, timestamp=timezone.now()) setMetadata( host.name + ':' + check.name + '::lastvalue', getMetadata(host.name + ':' + check.name + '::value', 'No Data')) now = int(timezone.now().timestamp()) setMetadata(host.name + ':' + check.name + '::lastcheck', now) setMetadata(host.name + ':' + check.name + '::value', value) metadata = { 'nbtrue': getMetadata(host.name + ':' + check.name + '::nbtrue', '0'), 'nbfalse': getMetadata(host.name + ':' + check.name + '::nbfalse', '0'), 'lasttrue': getMetadata(host.name + ':' + check.name + '::lasttrue', 'No Data'), 'lastfalse': getMetadata(host.name + ':' + check.name + '::lastfalse', 'No Data'), 'error': getMetadata(host.name + ':' + check.name + '::error', 'OK') } # Update the metadata if value == 0: metadata['nbfalse'] = setMetadata( host.name + ':' + check.name + '::nbfalse', int(metadata['nbfalse']) + 1) metadata['lastfalse'] = setMetadata( host.name + ':' + check.name + '::lastfalse', timezone.now().strftime("%m/%d/%Y %H:%M:%S")) elif value == 1: metadata['nbtrue'] = setMetadata( host.name + ':' + check.name + '::nbtrue', int(metadata['nbtrue']) + 1) metadata['lasttrue'] = setMetadata( host.name + ':' + check.name + '::lasttrue', timezone.now().strftime("%m/%d/%Y %H:%M:%S")) thiserror = 'false' metadata['error'] = 'false' errors = alertbool(check, host, value) if check.colorizesla and len(errors) > 0: thiserror = 'ok' metadata['error'] = 'ok' # dbg(check.name + ' is colorizesla') # if colorizesla is true, we check if the sla is failing instead of the check slas = check.sla.filter(enabled=True) for sla in slas: # dbg('checking sla ' + sla.name + '. its status is ' + sla.status) if sla.status == 'failing': metadata['error'] = 'crit' thiserror = 'crit' else: for error in errors: if error.get('hasError', True): if error.get('boolgood', False) or error.get('boolbad', False): thiserror = 'crit' elif error.get('hasError', None) is False: thiserror = 'ok' metadata['error'] = setMetadata(host.name + ':' + check.name + '::error', thiserror) # save a snapshot of the metadata when the check ran history.data = json.dumps(metadata) history.save() return metadata