def _send_requests(self): # Get ip addresses to ping hosts = self._hosts.values() # Ping each host for host in hosts: if self._requests.has_key(host): debug("Duplicate host %s ignored" % host, 6) continue host.time = time.time() # create and save a request identifier packet, cookie = host.make_packet(self._packetsize) self._requests[cookie] = host host.next_seq() try: if not host.is_v6(): self._sock4.sendto(packet, (host.ip, 0)) else: self._sock6.sendto(packet, (host.ip, 0, 0, 0)) except Exception, error: debug("Failed to ping %s [%s]" % (host.ip, error), 5) sleep(self._delay)
def __init__(self, service, port=0, status=event.Event.UP): """ type is the name of the handler (subclass) service is a dict containing ip, sysname, netboxid, serviceid, version and extra arguments to the handler status defaults to up, but can be overridden. """ self._conf = config.serviceconf() self.setServiceid(service['id']) self.setIp(service['ip']) self.setNetboxid(service['netboxid']) self.setArgs(service['args']) self.setVersion(service['version']) self.setSysname(service['sysname']) self.setDeviceid(service['deviceid']) # This is (and should be) used by all subclasses self.setPort(int(service['args'].get('port', port))) self.setStatus(status) self.setTimestamp(0) timeout = self.getArgs().get( 'timeout', self._conf.get("%s timeout" % self.getType(), self._conf.get('timeout', TIMEOUT))) self.setTimeout(int(timeout)) self.db = db.db() debug("New checker instance for %s:%s " % (self.getSysname(), self.getType()), 6) self.runcount = 0 self.rq = RunQueue.RunQueue()
def __init__(self, service, port=0, status=event.Event.UP): """ type is the name of the handler (subclass) service is a dict containing ip, sysname, netboxid, serviceid, version and extra arguments to the handler status defaults to up, but can be overridden. """ self._conf = config.serviceconf() self.setServiceid(service['id']) self.setIp(service['ip']) self.setNetboxid(service['netboxid']) self.setArgs(service['args']) self.setVersion(service['version']) self.setSysname(service['sysname']) self.setDeviceid(service['deviceid']) # This is (and should be) used by all subclasses self.setPort(int(service['args'].get('port', port))) self.setStatus(status) self.setTimestamp(0) timeout = self.getArgs().get( 'timeout', self._conf.get("%s timeout" % self.getType(), self._conf.get('timeout', TIMEOUT))) self.setTimeout(int(timeout)) self.db = db.db() debug( "New checker instance for %s:%s " % (self.getSysname(), self.getType()), 6) self.runcount = 0 self.rq = RunQueue.RunQueue()
def getCheckers(self): """ Fetches new checkers from the NAV database and appends them to the runqueue. """ newcheckers = self.db.getCheckers(self.dirty) self.dirty=0 # make sure we don't delete all checkers if we get an empty # list from the database (maybe we have lost connection to # the db) if newcheckers: s=[] for i in newcheckers: if i in self._checkers: oldchecker = self._checkers[self._checkers.index(i)] s.append(oldchecker) else: s.append(i) self._checkers=s elif self.db.status and self._checkers: debug.debug("No checkers left in database, flushing list.") self._checkers=[] #randomiserer rekkefølgen på checkerbene for i in self._checkers: self._checkers.append(self._checkers.pop(int(len(self._checkers)*random.random())))
def updateRrd(self): try: statistics.update(self.getNetboxid(), self.getSysname(), 'N', self.getStatus(), self.getResponsetime(), self.getServiceid(), self.getType()) except Exception, e: service = "%s:%s" % (self.getSysname(), self.getType()) debug("rrd update failed for %s [%s]" % (service, e), 3)
def _process_response(self, raw_pong, sender, is_ipv6, arrival): # Extract header info and payload packet_class = PacketV6 if is_ipv6 else PacketV4 try: pong = packet_class(raw_pong) except Exception, error: debug("could not disassemble packet from %r: %s" % (sender, error), 2) return
def _process_response(self, raw_pong, sender, is_ipv6, arrival): # Extract header info and payload packet_class = PacketV6 if is_ipv6 else PacketV4 try: pong = packet_class(raw_pong) except Exception, error: debug("could not disassemble packet from %r: %s" % ( sender, error), 2) return
def updateRrd(self): try: statistics.update(self.getNetboxid(), self.getSysname(), 'N', self.getStatus(), self.getResponsetime(), self.getServiceid(), self.getType() ) except Exception, e: service = "%s:%s" % (self.getSysname(), self.getType()) debug("rrd update failed for %s [%s]" % (service, e), 3)
def run(self): """ Calls executeTest(). If the status has changed it schedules a new test. If the service has been unavailable for more than self.runcount times, it marks the service as down. """ version = self.getVersion() status, info = self.executeTest() service = "%s:%s" % (self.getSysname(), self.getType()) debug("%-20s -> %s" % (service, info), 6) if status == event.Event.UP: # Dirty hack to check if we timed out... # this is needed as ssl-socket calls may hang # in python < 2.3 if self.getResponsetime() > 2 * self.getTimeout(): debug("Adjusting status due to high responsetime (%s, %s)" % (service, self.getResponsetime())) status = event.Event.DOWN self.setResponsetime(2 * self.getTimeout()) if status != self.getStatus() and (self.runcount < int( self._conf.get('retry', 3))): delay = int(self._conf.get('retry delay', 5)) self.runcount += 1 debug("%-20s -> State changed. New check in %i sec. (%s, %s)" % (service, delay, status, info)) # Updates rrd every time to get proper 'uptime' for the service self.updateRrd() priority = delay + time.time() # Queue ourself self.rq.enq((priority, self)) return if status != self.getStatus(): debug("%-20s -> %s, %s" % (service, status, info), 1) newEvent = event.Event(self.getServiceid(), self.getNetboxid(), self.getDeviceid(), event.Event.serviceState, "serviceping", status, info) # Post to the NAV alertq self.db.newEvent(newEvent) self.setStatus(status) if version != self.getVersion() and self.getStatus() == event.Event.UP: newEvent = event.Event(self.getServiceid(), self.getNetboxid(), self.getDeviceid(), "version", "serviceping", status, info, version=self.getVersion()) self.db.newEvent(newEvent) self.updateRrd() self.setTimestamp() self.runcount = 0
def __init__(self, **kwargs): signal.signal(signal.SIGHUP, self.signalhandler) signal.signal(signal.SIGTERM, self.signalhandler) self.config=config.pingconf() debug.setDebugLevel(int(self.config.get("debuglevel",5))) debug.debug("Setting debuglevel=%s "% self.config.get("debuglevel",5)) self._isrunning=1 self._looptime=int(self.config.get("checkinterval",60)) debug.debug("Setting checkinterval=%i" %self._looptime) self._debuglevel=0 self.db=db.db() sock = kwargs.get("socket",None) self.pinger=megaping.MegaPing(sock) self._nrping = int(self.config.get("nrping" ,3)) # To keep status... self.netboxmap = {} # hash netboxid -> netbox self.down = [] # list of netboxids down self.replies = {} # hash netboxid -> circbuf self.ipToNetboxid = {}
def _get_responses(self): start = time.time() timeout = self._timeout while not self._sender_finished or self._requests: if self._sender_finished: runtime = time.time() - self._sender_finished if runtime > self._timeout: break else: timeout = self._timeout - runtime # Listen for incoming data on sockets readable, _wt, _er = select.select([self._sock6, self._sock4], [], [], timeout) # If data found if readable: # okay to use time here, because select has told us # there is data and we don't care to measure the time # it takes the system to give us the packet. arrival = time.time() # Find out which socket got data and read for sock in readable: try: raw_pong, sender = sock.recvfrom(4096) except socket.error as err: debug("RealityError -2: %s" % err, 1) continue is_ipv6 = sock == self._sock6 self._process_response(raw_pong, sender, is_ipv6, arrival) elif self._sender_finished: break # Everything else timed out for host in self._requests.values(): host.replies.push(None) end = time.time() self._elapsedtime = end - start
def make_sockets(): """Makes and returns the raw IPv6 and IPv4 ICMP sockets. This needs to run as root before dropping privileges. """ try: socketv6 = socket.socket(socket.AF_INET6, socket.SOCK_RAW, socket.getprotobyname('ipv6-icmp')) except Exception: debug("Could not create v6 socket") raise try: socketv4 = socket.socket(socket.AF_INET, socket.SOCK_RAW, socket.getprotobyname('icmp')) except Exception: debug("Could not create v6 socket") raise return [socketv6, socketv4]
def signalhandler(self, signum, frame): if signum == signal.SIGTERM: debug.debug("Caught SIGTERM. Exiting.",1) sys.exit(0) elif signum == signal.SIGHUP: # reopen the logfile logfile_path = self.config.get("logfile", "pping.log") debug.debug("Caught SIGHUP. Reopening logfile...") logfile = file(logfile_path,'a') nav.daemon.redirect_std_fds(stdout=logfile, stderr=logfile) debug.debug("Reopened logfile: %s" % logfile_path) else: debug.debug( "Caught %s. Resuming operation." % (signum),2)
def updateHostList(self): """ Fetches all netboxes from the NAVdb, and updates internal data structures. """ debug.debug("Getting hosts from database...",7) hosts = self.db.hostsToPing() netboxmap = {} self.ipToNetboxid = {} for host in hosts: netboxid, deviceid, sysname, ip, up = host netbox = Netbox(netboxid, deviceid, sysname, ip, up) if not self.netboxmap.has_key(netbox.netboxid): # new netbox. Be sure to get it's state if netbox.up != 'y': debug.debug("Got new netbox, %s, currently " "marked down in navDB" % netbox.ip, 7) self.down.append(netbox.netboxid) if not self.replies.has_key(netbox.netboxid): self.replies[netbox.netboxid] = circbuf.CircBuf() if netbox.up != 'y': buf = self.replies[netbox.netboxid] # This genious line marks all-down for the whole buf map(buf.push, [-1]*len(buf)) netboxmap[netbox.netboxid]=netbox self.ipToNetboxid[netbox.ip] = netbox.netboxid # Update netboxmap self.netboxmap = netboxmap debug.debug("We now got %i hosts in our list to ping" % len(self.netboxmap), 7) #then update our pinger object self.pinger.set_hosts(self.ipToNetboxid.keys())
def main(self): """ Loops until SIGTERM is caught. """ self.db.start() while self._isrunning: start=time.time() debug.debug("Starts pinging....", 7) self.updateHostList() elapsedtime=self.pinger.ping() self.generateEvents() debug.debug("%i hosts checked in %03.3f secs. %i hosts " "currently marked as down." % (len(self.netboxmap), elapsedtime, len(self.down))) wait=self._looptime-elapsedtime if wait > 0: debug.debug("Sleeping %03.3f secs" % wait,6) else: wait=abs(self._looptime + wait) debug.debug("Check lasted longer than looptime. " "Delaying next check for %03.3f secs" % wait,2) sleep(wait)
def __init__(self, **kwargs): signal.signal(signal.SIGHUP, self.signalhandler) signal.signal(signal.SIGTERM, self.signalhandler) self.conf=config.serviceconf() debug.setDebugLevel(int(self.conf.get('debuglevel', 4))) self._deamon=kwargs.get("fork", 1) self._isrunning=1 self._checkers=[] self._looptime=int(self.conf.get("checkinterval",60)) debug.debug("Setting checkinterval=%i"% self._looptime) self.db=db.db() debug.debug("Reading database config") debug.debug("Setting up runqueue") self._runqueue=RunQueue.RunQueue(controller=self) self.dirty = 1
def main(self): """ Loops until SIGTERM is caught. The looptime is defined by self._looptime """ self.db.start() while self._isrunning: start=time.time() self.getCheckers() wait=self._looptime - (time.time() - start) if wait <= 0: debug.debug("System clock has drifted backwards, resetting loop delay", 2) wait = self._looptime if self._checkers: pause=wait/(len(self._checkers)*2) else: pause=0 for checker in self._checkers: self._runqueue.enq(checker) sleep(pause) # extensive debugging dbgthreads=[] for i in gc.get_objects(): if isinstance(i, threading.Thread): dbgthreads.append(i) debug.debug("Garbage: %s Objects: %i Threads: %i" % (gc.garbage, len(gc.get_objects()), len(dbgthreads))) wait=(self._looptime - (time.time() - start)) debug.debug("Waiting %i seconds." % wait) if wait <= 0: debug.debug("Only superman can do this. Humans cannot wait for %i seconds." % wait,2) wait %= self._looptime sleep(wait) else: sleep(wait)
def __init__(self, sockets, conf=None): # Get config in /etc/pping.conf if conf is None: try: self._conf = config.pingconf() except Exception: debug("Failed to open config file. Using default values.", 2) self._conf = {} else: self._conf = conf # Delay between each packet is transmitted self._delay = float(self._conf.get('delay', 2)) / 1000 # convert from ms # Timeout before considering hosts as down self._timeout = int(self._conf.get('timeout', 5)) # Dictionary with all the hosts, populated by set_hosts() self._hosts = {} packetsize = int(self._conf.get('packetsize', 64)) if packetsize < 44: raise ValueError(("Packetsize (%s) too small to create a proper " "cookie; Must be at least 44.") % packetsize) self._packetsize = packetsize self._pid = os.getpid() % 65536 # Global timing of the ppinger self._elapsedtime = 0 # Initialize the sockets if sockets is not None: self._sock6 = sockets[0] self._sock4 = sockets[1] else: try: sockets = make_sockets() except Exception: debug("Tried to create sockets without beeing root!") self._sock6 = sockets[0] self._sock4 = sockets[1] debug("No sockets passed as argument, creating own")
def __init__(self, sockets, conf=None): # Get config in /etc/pping.conf if conf is None: try: self._conf = config.pingconf() except Exception: debug("Failed to open config file. Using default values.", 2) self._conf = {} else: self._conf = conf # Delay between each packet is transmitted self._delay = float(self._conf.get('delay', 2))/1000 # convert from ms # Timeout before considering hosts as down self._timeout = int(self._conf.get('timeout', 5)) # Dictionary with all the hosts, populated by set_hosts() self._hosts = {} packetsize = int(self._conf.get('packetsize', 64)) if packetsize < 44: raise ValueError(("Packetsize (%s) too small to create a proper " "cookie; Must be at least 44.") % packetsize) self._packetsize = packetsize self._pid = os.getpid() % 65536 # Global timing of the ppinger self._elapsedtime = 0 # Initialize the sockets if sockets is not None: self._sock6 = sockets[0] self._sock4 = sockets[1] else: try: sockets = make_sockets() except Exception: debug("Tried to create sockets without beeing root!") self._sock6 = sockets[0] self._sock4 = sockets[1] debug("No sockets passed as argument, creating own")
def run(self): """ Calls executeTest(). If the status has changed it schedules a new test. If the service has been unavailable for more than self.runcount times, it marks the service as down. """ version = self.getVersion() status, info = self.executeTest() service = "%s:%s" % (self.getSysname(), self.getType()) debug("%-20s -> %s" % (service, info), 6) if status == event.Event.UP: # Dirty hack to check if we timed out... # this is needed as ssl-socket calls may hang # in python < 2.3 if self.getResponsetime() > 2 * self.getTimeout(): debug("Adjusting status due to high responsetime (%s, %s)" % ( service, self.getResponsetime())) status = event.Event.DOWN self.setResponsetime(2 * self.getTimeout()) if status != self.getStatus() and (self.runcount < int(self._conf.get('retry', 3))): delay = int(self._conf.get('retry delay', 5)) self.runcount += 1 debug("%-20s -> State changed. New check in %i sec. (%s, %s)" % ( service, delay, status, info)) # Updates rrd every time to get proper 'uptime' for the service self.updateRrd() priority = delay + time.time() # Queue ourself self.rq.enq((priority, self)) return if status != self.getStatus(): debug("%-20s -> %s, %s" % (service, status, info), 1) newEvent = event.Event(self.getServiceid(), self.getNetboxid(), self.getDeviceid(), event.Event.serviceState, "serviceping", status, info ) # Post to the NAV alertq self.db.newEvent(newEvent) self.setStatus(status) if version != self.getVersion() and self.getStatus() == event.Event.UP: newEvent = event.Event(self.getServiceid(), self.getNetboxid(), self.getDeviceid(), "version", "serviceping", status, info, version=self.getVersion() ) self.db.newEvent(newEvent) self.updateRrd() self.setTimestamp() self.runcount = 0
self._elapsedtime = end - start def _process_response(self, raw_pong, sender, is_ipv6, arrival): # Extract header info and payload packet_class = PacketV6 if is_ipv6 else PacketV4 try: pong = packet_class(raw_pong) except Exception, error: debug("could not disassemble packet from %r: %s" % (sender, error), 2) return if pong.type != pong.ICMP_ECHO_REPLY: # we only care about echo replies debug( "Packet from %s was not an echo reply, but %s" % (sender, pong), 7) return if not pong.id == self._pid: debug( "packet from %r doesn't match our id " "(%s): %r (raw packet: %r)" % (sender, self._pid, pong, raw_pong), 7) return cookie = pong.data[:Host.COOKIE_LENGTH] # Find the host with this cookie try: host = self._requests[cookie]
end = time.time() self._elapsedtime = end - start def _process_response(self, raw_pong, sender, is_ipv6, arrival): # Extract header info and payload packet_class = PacketV6 if is_ipv6 else PacketV4 try: pong = packet_class(raw_pong) except Exception, error: debug("could not disassemble packet from %r: %s" % ( sender, error), 2) return if pong.type != pong.ICMP_ECHO_REPLY: # we only care about echo replies debug("Packet from %s was not an echo reply, but %s" % (sender, pong), 7) return if not pong.id == self._pid: debug("packet from %r doesn't match our id " "(%s): %r (raw packet: %r)" % (sender, self._pid, pong, raw_pong), 7) return cookie = pong.data[:Host.COOKIE_LENGTH] # Find the host with this cookie try: host = self._requests[cookie] except KeyError: debug("packet from %r does not match any outstanding request: "
while readArgs: line = raw_input() if not line: readArgs = 0 break try: splitted = line.split('=') key = splitted[0] val = "=".join(splitted[1:]) args[key] = val except Exception, e: print line, e print "Must be on form 'key=val'" print args debug.debug("Ip: %s sysname: %s handler: %s args: %s" % (ip, sysname, handler, args)) checker = checkermap.get(handler) if not checker: debug.debug("No such handler: %s" % handler) sys.exit(1) service={'id':serviceid, 'netboxid':netboxid, 'deviceid':0, 'ip':ip, 'sysname':sysname, 'args':args, 'version':version, 'deviceid':0 }
while readArgs: line = raw_input() if not line: readArgs = 0 break try: splitted = line.split('=') key = splitted[0] val = "=".join(splitted[1:]) args[key] = val except Exception, e: print line, e print "Must be on form 'key=val'" print args debug.debug("Ip: %s sysname: %s handler: %s args: %s" % (ip, sysname, handler, args)) checker = checkermap.get(handler) if not checker: debug.debug("No such handler: %s" % handler) sys.exit(1) service = { 'id': serviceid, 'netboxid': netboxid, 'deviceid': 0, 'ip': ip, 'sysname': sysname, 'args': args, 'version': version, 'deviceid': 0 }
def generateEvents(self): """ Report state changes to event engine. """ debug.debug("Checks which hosts didn't answer",7) answers = self.pinger.results() for ip, rtt in answers: # rtt = round trip time (-1 => host didn't reply) netboxid = self.ipToNetboxid.get(ip) self.replies[netboxid].push(rtt) netbox = self.netboxmap[netboxid] if rtt != -1: statistics.update(netbox.netboxid, netbox.sysname, 'N', 'UP', rtt) else: # ugly... statistics.update(netbox.netboxid, netbox.sysname, 'N', 'DOWN', 5) downNow = [] # Find out which netboxes to consider down for (netboxid, replies) in self.replies.items(): if replies[:self._nrping] == [-1]*self._nrping: downNow.append(netboxid) debug.debug("No answer from %i hosts" %len(downNow),7) # Detect state changes since last run reportDown = filter(lambda x: x not in self.down, downNow) reportUp = filter(lambda x: x not in downNow, self.down) self.down = downNow # Reporting netboxes as down debug.debug("Starts reporting %i hosts as down" % len(reportDown),7) for netboxid in reportDown: netbox = self.netboxmap[netboxid] newEvent = Event(None, netbox.netboxid, netbox.deviceid, Event.boxState, "pping", Event.DOWN ) self.db.newEvent(newEvent) debug.debug("%s marked as down." % netbox) # Reporting netboxes as up debug.debug("Starts reporting %i hosts as up" % len(reportUp),7) for netboxid in reportUp: try: netbox = self.netboxmap[netboxid] except: debug.debug("Netbox %s is no longer with us..." % netboxid) continue newEvent = Event(None, netbox.netboxid, netbox.deviceid, Event.boxState, "pping", Event.UP ) self.db.newEvent(newEvent) debug.debug( "%s marked as up." % netbox)