def test_world_readable(self): """Test world readable cache""" (handle, filename) = tempfile.mkstemp() os.unlink(filename) n = SimpleNagios(_cache=filename, _cache_user=self.nagios_user, _world_readable=True) n.ok("test") os.close(handle) try: reporter_test = NagiosReporter('test_cache', filename, -1, self.nagios_user) reporter_test.report_and_exit() except SystemExit: pass statres = os.stat(filename) self.assertTrue(statres.st_mode & stat.S_IROTH)
def main(): """Main function""" options = { 'nagios_check_filename': ('filename of where the nagios check data is stored', str, 'store', NAGIOS_CHECK_FILENAME), 'nagios_check_interval_threshold': ('threshold of nagios checks timing out', None, 'store', NAGIOS_CHECK_INTERVAL_THRESHOLD), 'hosts': ('the hosts/clusters that should be contacted for job information', None, 'extend', []), 'location': ('the location for storing the pickle file: gengar, muk', str, 'store', 'gengar'), 'ha': ('high-availability master IP address', None, 'store', None), 'dry-run': ('do not make any updates whatsoever', None, 'store_true', False), } opts = simple_option(options) nag = SimpleNagios(_cache=NAGIOS_CHECK_FILENAME) if opts.options.ha and not proceed_on_ha_service(opts.options.ha): _log.info("Not running on the target host in the HA setup. Stopping.") nag.ok("Not running on the HA master.") else: # parse config file clusters = {} for host in opts.options.hosts: master = opts.configfile_parser.get(host, "master") showq_path = opts.configfile_parser.get(host, "showq_path") mjobctl_path = opts.configfile_parser.get(host, "mjobctl_path") clusters[host] = { 'master': master, 'spath': showq_path, 'mpath': mjobctl_path, } # process the new and previous data released_jobids, stats = process_hold(clusters, dry_run=opts.options.dry_run) # nagios state stats.update(RELEASEJOB_LIMITS) stats['message'] = "released %s jobs in hold" % len(released_jobids) nag._eval_and_exit(**stats) _log.info("Cached nagios state: %s %s" % (nag._final_state[0][1], nag._final_state[1]))
class ExtendedSimpleOption(SimpleOption): """ Extends the SimpleOption class to allow other checks to occur at script prologue and epilogue. - nagios reporting - checking if running on the designated HA host - locking on a file The prologue should be called at the start of the script; the epilogue at the end. """ def __init__(self, options, run_prologue=True, excepthook=None, **kwargs): """Initialise. If run_prologue is True (default), we immediately execute the prologue. Note that if taking a lock is requested (default), and the lock cannot be acquire for some reason, the program will exit, """ options_ = _merge_options(options) super(ExtendedSimpleOption, self).__init__(options_, **kwargs) self.nagios_reporter = None self.lockfile = None if run_prologue: self.prologue() if not excepthook: sys.excepthook = self.critical_exception_handler else: sys.excepthook = excepthook self.log = fancylogger.getLogger() def prologue(self): """Checks the options given for settings and takes appropriate action. See _merge_options for the format. - if nagios_report is set, creates a SimpleNagios instance and prints the report. - if ha is set, checks if running on the correct host, set the appropriate nagios message and bail if not. - if locking_filename is set, take a lock. If the lock fails, bork and set the nagios exit accordingly. """ # bail if nagios report is requested self.nagios_reporter = SimpleNagios(_cache=self.options.nagios_check_filename, _report_and_exit=self.options.nagios_report, _threshold=self.options.nagios_check_interval_threshold, _cache_user=self.options.nagios_user, ) # check for HA host if self.options.ha and not proceed_on_ha_service(self.options.ha): self.log.warning("Not running on the target host %s in the HA setup. Stopping." % (self.options.ha,)) self.nagios_reporter.ok("Not running on the HA master.") sys.exit(NAGIOS_EXIT_OK) if not self.options.disable_locking and not self.options.dry_run: self.lockfile = TimestampedPidLockfile(self.options.locking_filename, threshold=self.options.nagios_check_interval_threshold * 2) lock_or_bork(self.lockfile, self.nagios_reporter) self.log.info("%s has started" % (_script_name(sys.argv[0]))) def _epilogue(self): if not self.options.disable_locking and not self.options.dry_run: release_or_bork(self.lockfile, self.nagios_reporter) def epilogue(self, nagios_message, nagios_thresholds=None): """Run at the end of a script, quitting gracefully if possible.""" if nagios_thresholds is None: nagios_thresholds = {} self._epilogue() nagios_thresholds['message'] = nagios_message self.nagios_reporter._eval_and_exit(**nagios_thresholds) self.log.info("%s has finished" % (_script_name(sys.argv[0]))) # may not be reached def ok(self, nagios_message): """Run at the end of a script and force an OK exit.""" self._epilogue() self.nagios_reporter.ok(nagios_message) def warning(self, nagios_message): """Run at the end of a script and force a Warning exit.""" self._epilogue() self.nagios_reporter.warning(nagios_message) def critical(self, nagios_message): """Run at the end of a script and force a Critical exit.""" self._epilogue() self.nagios_reporter.critical(nagios_message) def critical_exception_handler(self, tp, value, traceback): """ Run at the end of a script and force a Critical exit. This function is meant to be used as sys.excepthook """ self.log.exception("unhandled exception detected: %s - %s", tp, value) self.log.debug("traceback %s", traceback) message = "Script failure: %s - %s" % (tp, value) sys.exc_clear() self.critical(message)
class ExtendedSimpleOption(SimpleOption): """ Extends the SimpleOption class to allow other checks to occur at script prologue and epilogue. - nagios reporting - checking if running on the designated HA host - locking on a file The prologue should be called at the start of the script; the epilogue at the end. """ def __init__(self, options, run_prologue=True, excepthook=None, **kwargs): """Initialise. If run_prologue is True (default), we immediately execute the prologue. Note that if taking a lock is requested (default), and the lock cannot be acquire for some reason, the program will exit, """ options_ = _merge_options(options) super(ExtendedSimpleOption, self).__init__(options_, **kwargs) self.nagios_reporter = None self.lockfile = None if run_prologue: self.prologue() if not excepthook: sys.excepthook = self.critical_exception_handler else: sys.excepthook = excepthook self.log = fancylogger.getLogger() def prologue(self): """Checks the options given for settings and takes appropriate action. See _merge_options for the format. - if nagios_report is set, creates a SimpleNagios instance and prints the report. - if ha is set, checks if running on the correct host, set the appropriate nagios message and bail if not. - if locking_filename is set, take a lock. If the lock fails, bork and set the nagios exit accordingly. """ # bail if nagios report is requested self.nagios_reporter = SimpleNagios( _cache=self.options.nagios_check_filename, _report_and_exit=self.options.nagios_report, _threshold=self.options.nagios_check_interval_threshold, _cache_user=self.options.nagios_user, ) # check for HA host if self.options.ha and not proceed_on_ha_service(self.options.ha): self.log.warning( "Not running on the target host %s in the HA setup. Stopping." % (self.options.ha, )) self.nagios_reporter.ok("Not running on the HA master.") sys.exit(NAGIOS_EXIT_OK) if not self.options.disable_locking and not self.options.dry_run: self.lockfile = TimestampedPidLockfile( self.options.locking_filename, threshold=self.options.nagios_check_interval_threshold * 2) lock_or_bork(self.lockfile, self.nagios_reporter) self.log.info("%s has started" % (_script_name(sys.argv[0]))) def _epilogue(self): if not self.options.disable_locking and not self.options.dry_run: release_or_bork(self.lockfile, self.nagios_reporter) def epilogue(self, nagios_message, nagios_thresholds=None): """Run at the end of a script, quitting gracefully if possible.""" if nagios_thresholds is None: nagios_thresholds = {} self._epilogue() nagios_thresholds['message'] = nagios_message self.nagios_reporter._eval_and_exit(**nagios_thresholds) self.log.info("%s has finished" % (_script_name(sys.argv[0]))) # may not be reached def ok(self, nagios_message): """Run at the end of a script and force an OK exit.""" self._epilogue() self.nagios_reporter.ok(nagios_message) def warning(self, nagios_message): """Run at the end of a script and force a Warning exit.""" self._epilogue() self.nagios_reporter.warning(nagios_message) def critical(self, nagios_message): """Run at the end of a script and force a Critical exit.""" self._epilogue() self.nagios_reporter.critical(nagios_message) def critical_exception_handler(self, tp, value, traceback): """ Run at the end of a script and force a Critical exit. This function is meant to be used as sys.excepthook """ self.log.exception("unhandled exception detected: %s - %s", tp, value) self.log.debug("traceback %s", traceback) message = "Script failure: %s - %s" % (tp, value) sys.exc_clear() self.critical(message)