def doRequest(self, url, method, headers, params): parts = urlparse(url) domain_name = parts[1].split(':')[0] if not iplib.is_dot(domain_name): headers['Host'] = domain_name if parts[0] == 'https': h = httplib.HTTPSConnection(parts[1]) else: h = httplib.HTTPConnection(parts[1]) uri = parts[2] if parts[4]: uri += "?%s" % parts[4] if params and method != 'POST': uri = "%s?%s" % (uri, urllib.urlencode(params)) if method == 'POST': headers['Content-type'] = 'application/x-www-form-urlencoded' if params and method == 'POST': h.request(method, uri, urllib.urlencode(params), headers=headers) else: h.request(method, uri, headers=headers) r = h.getresponse() return r
def check(self, test_type, schedule, metadata={}, **kwargs): # Parse input data schedule = ScheduleProxy(**schedule) if not hasattr(schedule, "metadata"): schedule.metadata = cPickle.dumps(metadata) elif not metadata and schedule.metadata: metadata = cPickle.loads(schedule.metadata) if test_type == 'service': schedule.check_type = schedule.service_type elif test_type == 'resource': schedule.check_type = schedule.resource_check_method if test_type != 'traceroute': # Load checker try: checker = self.checker_plugins[schedule.check_type] except KeyError: return self._reportResult(test_type, None, datetime.now(), -1, 0, "Unable to find plugin for %s" % schedule.check_type) # Set schedule ip address if schedule.ip_address == None: if iplib.is_dot(schedule.fqdn): schedule.ip_address = schedule.fqdn else: try: schedule.ip_address = socket.gethostbyname(schedule.fqdn) except: if test_type != 'traceroute' and checker.textkey == 'resource.snmp': return self._reportResourceResults(test_type, schedule, datetime.now(), None, "Unable to resolve host name %s" % schedule.fqdn) else: return self._reportResult(test_type, schedule, datetime.now(), 0, 0, "Unable to resolve host name %s" % schedule.fqdn) if test_type != 'traceroute': try: if hasattr(schedule, "fqdn") and not kwargs.get("nodns"): r = DNS.Request(schedule.fqdn, qtype='A').req() for a in r.answers: if a['typename'] != 'A': continue schedule.ip_address = a['data'] schedule.dns_ttl_expiration = datetime.now() + timedelta(seconds=min(self.max_allowed_ttl, int(a['ttl']))) except: logging.getLogger("checker").error("Could not find DNS info for %s: %s" % (schedule.fqdn, traceback.format_exc())) pass # Run checker if checker.textkey == 'resource.snmp': checker.check(schedule) return self._reportResourceResults(**checker.test_results) else: checker.port = None if hasattr(checker, "default_port"): checker.port = checker.default_port if schedule.port: checker.port = int(schedule.port) ret = checker.check(schedule, metadata) return self._reportResult(test_type, schedule, *ret) else: # Run traceroute start = time.time() metadata = TracerouteThread.doTraceroute(schedule) duration = time.time() - start return self._reportResult(test_type, schedule, datetime.now(), 1, duration, metadata)
def _to_python(self, value, state): import iplib if iplib.is_dot(value): cidr = iplib.CIDR(value+"/32") return value try: cidr = iplib.CIDR(value) return value except ValueError: raise Invalid( "Please enter an IP address, for example '192.168.0.0'", value, state)
def run(self): flush_dt = datetime(1900, 1, 1, 0, 0, 0) while 1: if self.shutdown_flag.isSet(): self.logger.info("DNSThread shutting down") break # Get all the schedules that have a ttl timeout that has passed schedules = nodedb.LocalSchedule.select(OR(nodedb.LocalSchedule.q.dns_ttl_expiration < datetime.now(), nodedb.LocalSchedule.q.dns_ttl_expiration == None)) self.logger.info("Found %s schedules to update DNS for" % schedules.count()) for s in schedules: if not s.fqdn: continue if iplib.is_dot(s.fqdn): s.ip_address = s.fqdn continue if s.dns_ttl_expiration == flush_dt: try: call("unbound-control flush %s" % s.fqdn, shell=True) except Exception, e: self.logger.error("Could not run unbound-control: %s" % str(e)) try: r = DNS.Request(s.fqdn, qtype='A').req() found = False for a in r.answers: if a['typename'] != 'A': continue s.ip_address = a['data'] s.dns_ttl_expiration = datetime.now() + timedelta(seconds=min(self.max_allowed_ttl, int(a['ttl']))) self.test_conduit['last_resolved_dns'] = (s.fqdn, s.ip_address) found = True break if not found: s.ip_address = None except: self.logger.error("Error looking up DNS for %s: %s" % (s.fqdn, traceback.format_exc())) # Sleep for a bit until we're needed again. try: interval = self.__config.get("Checker", "dns_sleep_interval") except: interval = 15 # equivalent to sleeping, but while waiting for the shutdown event # this prevents hanging self.shutdown_flag.wait(float(interval))
def doSNMPDiscovery(self, schedule): "Perform a walk rooted at the OID specified in the schedule and return a list of child OIDs" self.logger.info("Performing SNMP discovery for %s on %s" % (schedule.base_oid, schedule.fqdn)) if schedule.base_oid[0] != '.': schedule.base_oid = '.%s' % schedule.base_oid # Look up the IP address if needed if schedule.ip_address is None and iplib.is_dot(schedule.fqdn): schedule.ip_address = schedule.fqdn elif schedule.ip_address is None: self.logger.info("DNS lookup for %s" % schedule.fqdn) # Need to do a DNS lookup so we can query try: schedule.ip_address = socket.gethostbyname(schedule.fqdn) except: # Couldn't get an IP address - report as an error self.logger.critical("Couldn't resolve DNS for %s" % schedule.fqdn) return self.logger.info("DNS lookup finished for %s" % schedule.fqdn) ip_address = schedule.ip_address data = pickle.loads(schedule.metadata) port = int(data.get('snmp_port', 161)) community = data.get('snmp_community', 'public') version = data.get('snmp_version', '1') user = data.get('snmp_user', 'USER') auth_algorithm = data.get('snmp_authentication_algorithm', 'off') auth_key = data.get('snmp_authentication_key', 'AUTH_KEY') enc_algorithm = data.get('snmp_encryption_algorithm', 'off') enc_key = data.get('snmp_encryption_key', 'ENC_KEY') oid_map = self.do_snmp_walk(version, ip_address, port, community, user, auth_algorithm, auth_key, enc_algorithm, enc_key, schedule.base_oid) if schedule.base_oid in self.oid_name_map: name_map = self.do_snmp_walk(version, ip_address, port, community, user, auth_algorithm, auth_key, enc_algorithm, enc_key, self.oid_name_map[schedule.base_oid]) else: name_map = {} # Diff what we found with what's currently stored in LocalSNMPDiscover previous = {} for d in nodedb.LocalSNMPDiscovery.select(nodedb.LocalSNMPDiscovery.q.parent_resource_id == schedule.server_resource_id): previous[d.oid_numeric] = d # Create new records for any newly discovered OIDs for oid in oid_map.keys(): oid = str(oid) prefix, index = oid.rsplit(".", 1) if prefix in self.oid_name_map: name_oid = "%s.%s" % (self.oid_name_map[prefix], index) name = name_map.get(name_oid, "").strip('""') else: name = "" if oid not in previous: # Newly discovered OID that wasn't there previously nodedb.LocalSNMPDiscovery(parent_resource_id = schedule.server_resource_id, oid_numeric = oid, harvested = None, deleted = None, name = name) elif previous[oid].deleted: # Was previously there but deleted - bring it back and remove from the previous dict, # so we know it's been seen previous[oid].deleted = None previous[oid].harvested = None del previous[oid] elif previous[oid].name != name: # We've seen this one before, but the name has changed - mark it to be re-synced previous[oid].name = name previous[oid].harvested = None del previous[oid] else: # Seen previously, no changes del previous[oid] # Mark any remaining local entries as deleted so the RH picks up the status change for oid, schedule in previous.items(): schedule.deleted = datetime.now() return
def run(self): try: batch_start_time = time.time() batch_busy_time = 0.0 while not self.individual_shutdown_flag: # If we've received the shutdown signal, break out of the loop so the system can # gracefully shutdown. if self.shutdown_flag.isSet(): self.logger.info("%s shutting down" % self.getName()) break # Get something to work on, and exit if it's the special "end of work" trigger. schedule = self.getWork() if not schedule: self.logger.info("Work complete for %s - exiting" % self.getName()) break # Mark when we started processing this schedule loop_start_time = time.time() # Special handling for SNMP list of schedules - setup two separate variables, # "schedule" is a single schedule used to determine the plugin time and details, # and "schedules" which is a list of schedules to be checked. if type(schedule) == type([]): schedules = schedule schedule = schedule[0] else: schedules = [schedule] # Look up the IP address if needed if not schedule.ip_address and iplib.is_dot(schedule.fqdn): schedule.ip_address = schedule.fqdn elif not schedule.ip_address: self.logger.info("DNS lookup for %s" % schedule.fqdn) # Need to do a DNS lookup so we can query try: schedule.ip_address = socket.gethostbyname(schedule.fqdn) except: # Couldn't get an IP address - report as an error check_type = schedule.check_type checker = self.plugins[check_type] if checker.textkey != "resource.snmp": self.plugins[schedule.check_type].reportResults( schedule, datetime.now(), 0, 0, "Unable to resolve host name %s" % schedule.fqdn ) continue self.logger.info("DNS lookup finished for %s" % schedule.fqdn) self.logger.debug("%s [%s]" % (schedule.schedule_id, self.check_count)) # Dispatch control to the proper check method try: self.check_count += 1 check_type = schedule.check_type checker = self.plugins[check_type] except KeyError: self.logger.error("%s Unable to find plugin for %s" % (self.getName(), schedule.check_type)) try: email_from = self.__config.get("ErrorCatcher", "email.from") email_to = self.__config.get("ErrorCatcher", "email.to") try: email_server = self.__config.get("ErrorCatcher", "email.server") except: email_server = "localhost" try: email_port = self.__config.get("ErrorCatcher", "email.port") except: email_port = 25 try: email_user = self.__config.get("ErrorCatcher", "email.user") except: email_user = None try: email_password = self.__config.get("ErrorCatcher", "email.password") except: email_password = None try: running_environment = self.__config.get("ErrorCatcher", "running_environment") except: running_environment = "Development" text = "%s Unable to find plugin for %s" % (self.getName(), schedule.check_type) msg = MIMEText(text) msg["From"] = email_from msg["To"] = email_to msg["Subject"] = "Checker plugin not found on %s" % running_environment s = smtplib.SMTP(email_server, email_port) s.ehlo() if email_user: s.starttls() s.ehlo() s.login(email_user, email_password) s.sendmail(email_from, [email_to], msg.as_string()) s.quit() except: pass continue # START LATENCY STUFF # calculate how long between the time it was queued until now # (with microsecond resolution) that the check has been waiting when_queued = schedule.next_check_time - timedelta(seconds=schedule.frequency) latency = datetime.now() - when_queued latency = latency.seconds + (latency.microseconds / 1000000.0) idle = time.time() - self.last_engaged self.latency_queue.put(latency) self.last_engaged = time.time() # because SNMP uses a list of schedules and does a reportResults # on each individual result, we have to call it differently from # everyone else if checker.textkey == "resource.snmp": self.logger.debug("checking SNMP %r", schedule) checker.check(schedules) self.logger.debug("done checking SNMP %r", schedule) else: self.logger.debug("loading metadata for %r", schedule) data = checker.loadMetadata(schedule) self.logger.debug("done loading metadata for %r", schedule) # this logic here is plugin object specific, but applicable # to every plugin that uses a port checker.port = None if hasattr(checker, "default_port"): checker.port = checker.default_port if schedule.port: checker.port = int(schedule.port) self.logger.debug("checking on %r", schedule) ret = checker.check(schedule, data) self.logger.debug("done checking on %r, received %r", schedule, ret) # remove old failed content if successful if ret[1]: try: for fc in nodedb.LocalFailedCheckContent.selectBy( monitor_point_id=schedule.monitor_point_id ): fc.destroySelf() except: pass self.logger.debug("reporting results for %r, %r", schedule, ret) checker.reportResults(schedule, *ret) self.logger.debug("done reporting results for %r, %r", schedule, ret) # Execution time bookkeeping batch_busy_time += time.time() - loop_start_time if time.time() - batch_start_time >= USAGE_REPORT_FREQUENCY: usage = 100.0 * (batch_busy_time / (time.time() - batch_start_time)) self.logger.info("Thread %s usage: %.2f%%" % (self.getName(), usage)) batch_start_time = time.time() batch_busy_time = 0.0 if self.check_count >= self.lifetime: self.logger.critical("Exceeded thread lifetime, shutting down") break finally: self.logger.exception("exception in thread")