class Shinken(BaseSatellite): properties = BaseSatellite.properties.copy() properties.update({ 'pidfile': PathProp(default='schedulerd.pid'), 'port': IntegerProp(default='7768'), 'local_log': PathProp(default='schedulerd.log'), }) # Create the shinken class: # Create a Pyro server (port = arvg 1) # then create the interface for arbiter # Then, it wait for a first configuration def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''): BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug, debug_file) self.interface = IForArbiter(self) self.istats = IStats(self) self.sched = Scheduler(self) self.ichecks = None self.ibroks = None self.must_run = True # Now the interface self.uri = None self.uri2 = None # And possible links for satellites # from now only pollers self.pollers = {} self.reactionners = {} self.brokers = {} def do_stop(self): if self.http_daemon: if self.ibroks: self.http_daemon.unregister(self.ibroks) if self.ichecks: self.http_daemon.unregister(self.ichecks) super(Shinken, self).do_stop() def compensate_system_time_change(self, difference): """ Compensate a system time change of difference for all hosts/services/checks/notifs """ logger.warning("A system time change of %d has been detected. Compensating..." % difference) # We only need to change some value self.program_start = max(0, self.program_start + difference) if not hasattr(self.sched, "conf"): # Race condition where time change before getting conf return # Then we compensate all host/services for h in self.sched.hosts: h.compensate_system_time_change(difference) for s in self.sched.services: s.compensate_system_time_change(difference) # Now all checks and actions for c in self.sched.checks.values(): # Already launch checks should not be touch if c.status == 'scheduled' and c.t_to_go is not None: t_to_go = c.t_to_go ref = c.ref new_t = max(0, t_to_go + difference) if ref.check_period is not None: # But it's no so simple, we must match the timeperiod new_t = ref.check_period.get_next_valid_time_from_t(new_t) # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: c.state = 'waitconsume' c.exit_status = 2 c.output = '(Error: there is no available check time after time change!)' c.check_time = time.time() c.execution_time = 0 else: c.t_to_go = new_t ref.next_chk = new_t # Now all checks and actions for c in self.sched.actions.values(): # Already launch checks should not be touch if c.status == 'scheduled': t_to_go = c.t_to_go # Event handler do not have ref ref = getattr(c, 'ref', None) new_t = max(0, t_to_go + difference) # Notification should be check with notification_period if c.is_a == 'notification': if ref.notification_period: # But it's no so simple, we must match the timeperiod new_t = ref.notification_period.get_next_valid_time_from_t(new_t) # And got a creation_time variable too c.creation_time = c.creation_time + difference # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: c.state = 'waitconsume' c.exit_status = 2 c.output = '(Error: there is no available check time after time change!)' c.check_time = time.time() c.execution_time = 0 else: c.t_to_go = new_t def manage_signal(self, sig, frame): logger.warning("Received a SIGNAL %s" % sig) # If we got USR1, just dump memory if sig == signal.SIGUSR1: self.sched.need_dump_memory = True elif sig == signal.SIGUSR2: #usr2, dump objects self.sched.need_objects_dump = True else: # if not, die :) self.sched.die() self.must_run = False Daemon.manage_signal(self, sig, frame) def do_loop_turn(self): # Ok, now the conf self.wait_for_initial_conf() if not self.new_conf: return logger.info("New configuration received") self.setup_new_conf() logger.info("New configuration loaded") self.sched.run() def setup_new_conf(self): pk = self.new_conf conf_raw = pk['conf'] override_conf = pk['override_conf'] modules = pk['modules'] satellites = pk['satellites'] instance_name = pk['instance_name'] push_flavor = pk['push_flavor'] skip_initial_broks = pk['skip_initial_broks'] accept_passive_unknown_check_results = pk['accept_passive_unknown_check_results'] # horay, we got a name, we can set it in our stats objects statsmgr.register(instance_name, 'scheduler') t0 = time.time() conf = cPickle.loads(conf_raw) logger.debug("Conf received at %d. Unserialized in %d secs" % (t0, time.time() - t0)) self.new_conf = None # Tag the conf with our data self.conf = conf self.conf.push_flavor = push_flavor self.conf.instance_name = instance_name self.conf.skip_initial_broks = skip_initial_broks self.conf.accept_passive_unknown_check_results = accept_passive_unknown_check_results self.cur_conf = conf self.override_conf = override_conf self.modules = modules self.satellites = satellites #self.pollers = self.app.pollers if self.conf.human_timestamp_log: logger.set_human_format() # Now We create our pollers for pol_id in satellites['pollers']: # Must look if we already have it already_got = pol_id in self.pollers p = satellites['pollers'][pol_id] self.pollers[pol_id] = p if p['name'] in override_conf['satellitemap']: p = dict(p) # make a copy p.update(override_conf['satellitemap'][p['name']]) proto = 'http' if p['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, p['address'], p['port']) self.pollers[pol_id]['uri'] = uri self.pollers[pol_id]['last_connection'] = 0 # First mix conf and override_conf to have our definitive conf for prop in self.override_conf: #print "Overriding the property %s with value %s" % (prop, self.override_conf[prop]) val = self.override_conf[prop] setattr(self.conf, prop, val) if self.conf.use_timezone != '': logger.debug("Setting our timezone to %s" % str(self.conf.use_timezone)) os.environ['TZ'] = self.conf.use_timezone time.tzset() if len(self.modules) != 0: logger.debug("I've got %s modules" % str(self.modules)) # TODO: if scheduler had previous modules instanciated it must clean them! self.modules_manager.set_modules(self.modules) self.do_load_modules() # give it an interface # But first remove previous interface if exists if self.ichecks is not None: logger.debug("Deconnecting previous Check Interface") self.http_daemon.unregister(self.ichecks) # Now create and connect it self.ichecks = IChecks(self.sched) self.http_daemon.register(self.ichecks) logger.debug("The Scheduler Interface uri is: %s" % self.uri) # Same for Broks if self.ibroks is not None: logger.debug("Deconnecting previous Broks Interface") self.http_daemon.unregister(self.ibroks) # Create and connect it self.ibroks = IBroks(self.sched) self.http_daemon.register(self.ibroks) logger.info("Loading configuration.") self.conf.explode_global_conf() # we give sched it's conf self.sched.reset() self.sched.load_conf(self.conf) self.sched.load_satellites(self.pollers, self.reactionners) # We must update our Config dict macro with good value # from the config parameters self.sched.conf.fill_resource_macros_names_macros() #print "DBG: got macros", self.sched.conf.macros # Creating the Macroresolver Class & unique instance m = MacroResolver() m.init(self.conf) #self.conf.dump() #self.conf.quick_debug() # Now create the external commander # it's a applyer: it role is not to dispatch commands, # but to apply them e = ExternalCommandManager(self.conf, 'applyer') # Scheduler need to know about external command to # activate it if necessary self.sched.load_external_command(e) # External command need the sched because he can raise checks e.load_scheduler(self.sched) # We clear our schedulers managed (it's us :) ) # and set ourself in it self.schedulers = {self.conf.instance_id: self.sched} # Give the arbiter the data about what I manage # for me it's just my instance_id and my push flavor def what_i_managed(self): if hasattr(self, 'conf'): return {self.conf.instance_id: self.conf.push_flavor} else: return {} # our main function, launch after the init def main(self): try: self.load_config_file() self.look_for_early_exit() self.do_daemon_init_and_start() self.load_modules_manager() self.http_daemon.register(self.interface) self.http_daemon.register(self.istats) #self.inject = Injector(self.sched) #self.http_daemon.register(self.inject) self.http_daemon.unregister(self.interface) self.uri = self.http_daemon.uri logger.info("[scheduler] General interface is at: %s" % self.uri) self.do_mainloop() except Exception, exp: logger.critical("I got an unrecoverable error. I have to exit") logger.critical("You can log a bug ticket at https://github.com/naparuba/shinken/issues/new to get help") logger.critical("Back trace of it: %s" % (traceback.format_exc())) raise
class Shinken(BaseSatellite): properties = BaseSatellite.properties.copy() properties.update({ 'pidfile': PathProp(default='/usr/local/shinken/var/schedulerd.pid'), 'port': IntegerProp(default='7768'), 'local_log': PathProp(default='/usr/local/shinken/var/schedulerd.log'), }) #Create the shinken class: #Create a Pyro server (port = arvg 1) #then create the interface for arbiter #Then, it wait for a first configuration def __init__(self, config_file, is_daemon, do_replace, debug, debug_file): BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug, debug_file) self.interface = IForArbiter(self) self.sched = Scheduler(self) self.ichecks = None self.ibroks = None self.must_run = True # Now the interface self.uri = None self.uri2 = None # And possible links for satellites # from now only pollers self.pollers = {} self.reactionners = {} def do_stop(self): self.pyro_daemon.unregister(self.ibroks) self.pyro_daemon.unregister(self.ichecks) super(Shinken, self).do_stop() def compensate_system_time_change(self, difference): """ Compensate a system time change of difference for all hosts/services/checks/notifs """ logger.log('Warning: A system time change of %d has been detected. Compensating...' % difference) # We only need to change some value self.program_start = max(0, self.program_start + difference) # Then we compasate all host/services for h in self.sched.hosts: h.compensate_system_time_change(difference) for s in self.sched.services: s.compensate_system_time_change(difference) # Now all checks and actions for c in self.sched.checks.values(): # Already launch checks should not be touch if c.status == 'scheduled': t_to_go = c.t_to_go ref = c.ref new_t = max(0, t_to_go + difference) # But it's no so simple, we must match the timeperiod new_t = ref.check_period.get_next_valid_time_from_t(new_t) # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: c.state = 'waitconsume' c.exit_status = 2 c.output = '(Error: there is no available check time after time change!)' c.check_time = time.time() c.execution_time = 0 else: c.t_to_go = new_t ref.next_chk = new_t # Now all checks and actions for c in self.sched.actions.values(): # Already launch checks should not be touch if c.status == 'scheduled': t_to_go = c.t_to_go # Event handler do not have ref ref = getattr(c, 'ref', None) new_t = max(0, t_to_go + difference) # Notification should be check with notification_period if c.is_a == 'notification': # But it's no so simple, we must match the timeperiod new_t = ref.notification_period.get_next_valid_time_from_t(new_t) # And got a creation_time variable too c.creation_time = c.creation_time + difference # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: c.state = 'waitconsume' c.exit_status = 2 c.output = '(Error: there is no available check time after time change!)' c.check_time = time.time() c.execution_time = 0 else: c.t_to_go = new_t def manage_signal(self, sig, frame): # If we got USR1, just dump memory if sig == 10: self.sched.need_dump_memory = True else: # if not, die :) self.sched.die() self.must_run = False Daemon.manage_signal(self, sig, frame) def do_loop_turn(self): # Ok, now the conf self.wait_for_initial_conf() if not self.new_conf: return print "Ok we've got conf" self.setup_new_conf() print "Configuration Loaded" self.sched.run() def setup_new_conf(self): #self.use_ssl = self.app.use_ssl (conf, override_conf, modules, satellites) = self.new_conf self.new_conf = None # In fact it make the scheduler just DIE as a bad guy. # Must manage it better or not manage it at all! #if self.cur_conf and self.cur_conf.magic_hash == conf.magic_hash: # print("I received a conf with same hash than me, I skip it.") # return self.conf = conf self.cur_conf = conf self.override_conf = override_conf self.modules = modules self.satellites = satellites #self.pollers = self.app.pollers # Now We create our pollers for pol_id in satellites['pollers']: # Must look if we already have it already_got = pol_id in self.pollers p = satellites['pollers'][pol_id] self.pollers[pol_id] = p uri = pyro.create_uri(p['address'], p['port'], 'Schedulers', self.use_ssl) self.pollers[pol_id]['uri'] = uri self.pollers[pol_id]['last_connexion'] = 0 print "Got a poller", p #First mix conf and override_conf to have our definitive conf for prop in self.override_conf: print "Overriding the property %s with value %s" % (prop, self.override_conf[prop]) val = self.override_conf[prop] setattr(self.conf, prop, val) if self.conf.use_timezone != 'NOTSET': print "Setting our timezone to", self.conf.use_timezone os.environ['TZ'] = self.conf.use_timezone time.tzset() print "I've got modules", self.modules # TODO: if scheduler had previous modules instanciated it must clean them ! self.modules_manager.set_modules(self.modules) self.do_load_modules() # And start external ones too self.modules_manager.start_external_instances() # give it an interface # But first remove previous interface if exists if self.ichecks is not None: print "Deconnecting previous Check Interface from pyro_daemon" self.pyro_daemon.unregister(self.ichecks) #Now create and connect it self.ichecks = IChecks(self.sched) self.uri = self.pyro_daemon.register(self.ichecks, "Checks") print "The Checks Interface uri is:", self.uri #Same for Broks if self.ibroks is not None: print "Deconnecting previous Broks Interface from pyro_daemon" self.pyro_daemon.unregister(self.ibroks) #Create and connect it self.ibroks = IBroks(self.sched) self.uri2 = self.pyro_daemon.register(self.ibroks, "Broks") print "The Broks Interface uri is:", self.uri2 print("Loading configuration..") self.conf.explode_global_conf() #we give sched it's conf self.sched.reset() self.sched.load_conf(self.conf) self.sched.load_satellites(self.pollers, self.reactionners) #We must update our Config dict macro with good value #from the config parameters self.sched.conf.fill_resource_macros_names_macros() #print "DBG: got macors", self.sched.conf.macros #Creating the Macroresolver Class & unique instance m = MacroResolver() m.init(self.conf) #self.conf.dump() #self.conf.quick_debug() #Now create the external commander #it's a applyer : it role is not to dispatch commands, #but to apply them e = ExternalCommandManager(self.conf, 'applyer') #Scheduler need to know about external command to #activate it if necessery self.sched.load_external_command(e) #External command need the sched because he can raise checks e.load_scheduler(self.sched) # our main function, launch after the init def main(self): self.load_config_file() self.do_daemon_init_and_start() self.uri2 = self.pyro_daemon.register(self.interface, "ForArbiter") print "The Arbiter Interface is at:", self.uri2 self.do_mainloop()
class Shinken(BaseSatellite): properties = BaseSatellite.properties.copy() properties.update( { "pidfile": PathProp(default="schedulerd.pid"), "port": IntegerProp(default="7768"), "local_log": PathProp(default="schedulerd.log"), } ) # Create the shinken class: # Create a Pyro server (port = arvg 1) # then create the interface for arbiter # Then, it wait for a first configuration def __init__(self, config_file, is_daemon, do_replace, debug, debug_file): BaseSatellite.__init__(self, "scheduler", config_file, is_daemon, do_replace, debug, debug_file) self.interface = IForArbiter(self) self.sched = Scheduler(self) self.ichecks = None self.ibroks = None self.must_run = True # Now the interface self.uri = None self.uri2 = None # And possible links for satellites # from now only pollers self.pollers = {} self.reactionners = {} def do_stop(self): if self.pyro_daemon: if self.ibroks: self.pyro_daemon.unregister(self.ibroks) if self.ichecks: self.pyro_daemon.unregister(self.ichecks) super(Shinken, self).do_stop() def compensate_system_time_change(self, difference): """ Compensate a system time change of difference for all hosts/services/checks/notifs """ logger.log("Warning: A system time change of %d has been detected. Compensating..." % difference) # We only need to change some value self.program_start = max(0, self.program_start + difference) # Then we compasate all host/services for h in self.sched.hosts: h.compensate_system_time_change(difference) for s in self.sched.services: s.compensate_system_time_change(difference) # Now all checks and actions for c in self.sched.checks.values(): # Already launch checks should not be touch if c.status == "scheduled": t_to_go = c.t_to_go ref = c.ref new_t = max(0, t_to_go + difference) if ref.check_period is not None: # But it's no so simple, we must match the timeperiod new_t = ref.check_period.get_next_valid_time_from_t(new_t) # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: c.state = "waitconsume" c.exit_status = 2 c.output = "(Error: there is no available check time after time change!)" c.check_time = time.time() c.execution_time = 0 else: c.t_to_go = new_t ref.next_chk = new_t # Now all checks and actions for c in self.sched.actions.values(): # Already launch checks should not be touch if c.status == "scheduled": t_to_go = c.t_to_go # Event handler do not have ref ref = getattr(c, "ref", None) new_t = max(0, t_to_go + difference) # Notification should be check with notification_period if c.is_a == "notification": if ref.notification_period: # But it's no so simple, we must match the timeperiod new_t = ref.notification_period.get_next_valid_time_from_t(new_t) # And got a creation_time variable too c.creation_time = c.creation_time + difference # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: c.state = "waitconsume" c.exit_status = 2 c.output = "(Error: there is no available check time after time change!)" c.check_time = time.time() c.execution_time = 0 else: c.t_to_go = new_t def manage_signal(self, sig, frame): # If we got USR1, just dump memory if sig == 10: self.sched.need_dump_memory = True else: # if not, die :) self.sched.die() self.must_run = False Daemon.manage_signal(self, sig, frame) def do_loop_turn(self): # Ok, now the conf self.wait_for_initial_conf() if not self.new_conf: return print "Ok we've got conf" self.setup_new_conf() print "Configuration Loaded" self.sched.run() def setup_new_conf(self): # self.use_ssl = self.app.use_ssl (conf, override_conf, modules, satellites) = self.new_conf self.new_conf = None # In fact it make the scheduler just DIE as a bad guy. # Must manage it better or not manage it at all! # if self.cur_conf and self.cur_conf.magic_hash == conf.magic_hash: # print("I received a conf with same hash than me, I skip it.") # return self.conf = conf self.cur_conf = conf self.override_conf = override_conf self.modules = modules self.satellites = satellites # self.pollers = self.app.pollers if self.conf.human_timestamp_log: logger.set_human_format() # Now We create our pollers for pol_id in satellites["pollers"]: # Must look if we already have it already_got = pol_id in self.pollers p = satellites["pollers"][pol_id] self.pollers[pol_id] = p uri = pyro.create_uri(p["address"], p["port"], "Schedulers", self.use_ssl) self.pollers[pol_id]["uri"] = uri self.pollers[pol_id]["last_connection"] = 0 # First mix conf and override_conf to have our definitive conf for prop in self.override_conf: # print "Overriding the property %s with value %s" % (prop, self.override_conf[prop]) val = self.override_conf[prop] setattr(self.conf, prop, val) if self.conf.use_timezone != "": print "Setting our timezone to", self.conf.use_timezone os.environ["TZ"] = self.conf.use_timezone time.tzset() if len(self.modules) != 0: print "I've got modules", self.modules # TODO: if scheduler had previous modules instanciated it must clean them ! self.modules_manager.set_modules(self.modules) self.do_load_modules() # And start external ones too self.modules_manager.start_external_instances() # give it an interface # But first remove previous interface if exists if self.ichecks is not None: print "Deconnecting previous Check Interface from pyro_daemon" self.pyro_daemon.unregister(self.ichecks) # Now create and connect it self.ichecks = IChecks(self.sched) self.uri = self.pyro_daemon.register(self.ichecks, "Checks") print "The Checks Interface uri is:", self.uri # Same for Broks if self.ibroks is not None: print "Deconnecting previous Broks Interface from pyro_daemon" self.pyro_daemon.unregister(self.ibroks) # Create and connect it self.ibroks = IBroks(self.sched) self.uri2 = self.pyro_daemon.register(self.ibroks, "Broks") print "The Broks Interface uri is:", self.uri2 print ("Loading configuration..") self.conf.explode_global_conf() # we give sched it's conf self.sched.reset() self.sched.load_conf(self.conf) self.sched.load_satellites(self.pollers, self.reactionners) # We must update our Config dict macro with good value # from the config parameters self.sched.conf.fill_resource_macros_names_macros() # print "DBG: got macors", self.sched.conf.macros # Creating the Macroresolver Class & unique instance m = MacroResolver() m.init(self.conf) # self.conf.dump() # self.conf.quick_debug() # Now create the external commander # it's a applyer : it role is not to dispatch commands, # but to apply them e = ExternalCommandManager(self.conf, "applyer") # Scheduler need to know about external command to # activate it if necessery self.sched.load_external_command(e) # External command need the sched because he can raise checks e.load_scheduler(self.sched) # We clear our schedulers managed (it's us :) ) # and set ourself in it self.schedulers = {self.conf.instance_id: self.sched} # Give the arbiter the data about what I manage # for me it's just my instance_id and my push flavor def what_i_managed(self): if hasattr(self, "conf"): return {self.conf.instance_id: self.conf.push_flavor} else: return {} # our main function, launch after the init def main(self): try: self.load_config_file() self.do_daemon_init_and_start() self.uri2 = self.pyro_daemon.register(self.interface, "ForArbiter") logger.log("[scheduler] General interface is at: %s" % self.uri2) self.do_mainloop() except Exception, exp: logger.log("CRITICAL ERROR: I got an unrecoverable error. I have to exit") logger.log("You can log a bug ticket at https://github.com/naparuba/shinken/issues/new to get help") logger.log("Back trace of it: %s" % (traceback.format_exc())) raise