Пример #1
0
class Reactionner(Satellite):
    """
    This class is an application that launches actions for the schedulers
    Actions can be:
       Notifications
       Event handlers

    When running the Reactionner will :
      Respond to pings from Arbiter
      Listen for new configurations from Arbiter

    The configuration consists of a list of Schedulers for which
    the Reactionner will launch actions for.
    """
    do_checks = False  # I do not do checks
    do_actions = True
    my_type = 'reactionner'

    properties = Satellite.properties.copy()
    properties.update({
        'pidfile': PathProp(default='reactionnerd.pid'),
        'port': IntegerProp(default=7769),
        'local_log': PathProp(default='reactionnerd.log'),
    })

    def __init__(self,
                 config_file,
                 is_daemon,
                 do_replace,
                 debug,
                 debug_file,
                 profile=''):
        super(Reactionner,
              self).__init__('reactionner', config_file, is_daemon, do_replace,
                             debug, debug_file)
Пример #2
0
class Poller(Satellite):
    """Poller class. Referenced as "app" in most Interface

    """
    do_checks = True  # I do checks
    do_actions = False  # but no actions
    my_type = 'poller'

    properties = Satellite.properties.copy()
    properties.update({
        'daemon_type': StringProp(default='poller'),
        'pidfile': PathProp(default='pollerd.pid'),
        'port': IntegerProp(default=7771),
        'local_log': PathProp(default='pollerd.log'),
    })

    def __init__(self,
                 config_file,
                 is_daemon,
                 do_replace,
                 debug,
                 debug_file,
                 port=None,
                 local_log=None,
                 daemon_name=None):
        self.daemon_name = 'poller'
        if daemon_name:
            self.daemon_name = daemon_name

        super(Poller,
              self).__init__(self.daemon_name, config_file, is_daemon,
                             do_replace, debug, debug_file, port, local_log)
Пример #3
0
class Receiver(Satellite):
    """Receiver class. Referenced as "app" in most Interface

    """
    my_type = 'receiver'

    properties = Satellite.properties.copy()
    properties.update({
        'pidfile': PathProp(default='receiverd.pid'),
        'port': IntegerProp(default=7773),
        'local_log': PathProp(default='receiverd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file):

        super(Receiver, self).__init__('receiver', config_file, is_daemon,
                                       do_replace, debug, debug_file)

        # Our arbiters
        self.arbiters = {}

        # Our pollers and reactionners
        self.pollers = {}
        self.reactionners = {}

        # Modules are load one time
        self.have_modules = False

        # Can have a queue of external_commands give by modules
        # will be taken by arbiter to process
        self.external_commands = []
        # and the unprocessed one, a buffer
        self.unprocessed_external_commands = []

        self.host_assoc = {}
        self.direct_routing = False
        self.accept_passive_unknown_check_results = False

        self.http_interface = ReceiverInterface(self)

        # Now create the external commander. It's just here to dispatch
        # the commands to schedulers
        ecm = ExternalCommandManager(None, 'receiver')
        ecm.load_receiver(self)
        self.external_command = ecm

    def add(self, elt):
        """Add an object to the receiver one
        Handles brok and externalcommand

        :param elt: object to add
        :type elt: object
        :return: None
        """
        cls_type = elt.__class__.my_type
        if cls_type == 'brok':
            # For brok, we TAG brok with our instance_id
            elt.instance_id = 0
            self.broks[elt._id] = elt
            return
        elif cls_type == 'externalcommand':
            logger.debug("Enqueuing an external command: %s",
                         str(ExternalCommand.__dict__))
            self.unprocessed_external_commands.append(elt)

    def push_host_names(self, sched_id, hnames):
        """Link hostnames to scheduler id.
        Called by alignak.satellite.IForArbiter.push_host_names

        :param sched_id: scheduler id to link to
        :type sched_id: int
        :param hnames: host names list
        :type hnames: list
        :return: None
        """
        for h_name in hnames:
            self.host_assoc[h_name] = sched_id

    def get_sched_from_hname(self, hname):
        """Get scheduler linked to the given host_name

        :param hname: host_name we want the scheduler from
        :type hname: str
        :return: scheduler with id corresponding to the mapping table
        :rtype: dict
        """
        item = self.host_assoc.get(hname, None)
        sched = self.schedulers.get(item, None)
        return sched

    def manage_brok(self, brok):
        """Send brok to modules. Modules have to implement their own manage_brok function.
        They usually do if they inherits from basemodule
        REF: doc/receiver-modules.png (4-5)

        :param brok: brok to manage
        :type brok: alignak.brok.Brok
        :return: None
        """
        to_del = []
        # Call all modules if they catch the call
        for mod in self.modules_manager.get_internal_instances():
            try:
                mod.manage_brok(brok)
            except Exception, exp:
                logger.warning("The mod %s raise an exception: %s, I kill it",
                               mod.get_name(), str(exp))
                logger.warning("Exception type: %s", type(exp))
                logger.warning("Back trace of this kill: %s",
                               traceback.format_exc())
                to_del.append(mod)
        # Now remove mod that raise an exception
        self.modules_manager.clear_instances(to_del)
Пример #4
0
class Broker(BaseSatellite):
    """
    Class to manage a Broker daemon
    A Broker is used to get data from Scheduler and send them to modules. These modules in most
    cases export to other softwares, databases...
    """
    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile':   PathProp(default='brokerd.pid'),
        'port':      IntegerProp(default=7772),
        'local_log': PathProp(default='brokerd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''):

        super(Broker, self).__init__('broker', config_file, is_daemon, do_replace, debug,
                                     debug_file)

        # Our arbiters
        self.arbiters = {}

        # Our pollers, reactionners and receivers
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}

        # Modules are load one time
        self.have_modules = False

        # Can have a queue of external_commands given by modules
        # will be processed by arbiter
        self.external_commands = []

        # All broks to manage
        self.broks = []  # broks to manage
        # broks raised this turn and that needs to be put in self.broks
        self.broks_internal_raised = []
        # broks raised by the arbiters, we need a lock so the push can be in parallel
        # to our current activities and won't lock the arbiter
        self.arbiter_broks = []
        self.arbiter_broks_lock = threading.RLock()

        self.timeout = 1.0

        self.http_interface = BrokerInterface(self)

    def add(self, elt):
        """Add elt to this broker

        Original comment : Schedulers have some queues. We can simplify the call by adding
          elements into the proper queue just by looking at their type  Brok -> self.broks
          TODO: better tag ID?
          External commands -> self.external_commands

        :param elt: object to add
        :type elt: object
        :return: None
        """
        cls_type = elt.__class__.my_type
        if cls_type == 'brok':
            # For brok, we TAG brok with our instance_id
            elt.instance_id = 0
            self.broks_internal_raised.append(elt)
            return
        elif cls_type == 'externalcommand':
            logger.debug("Enqueuing an external command '%s'", str(ExternalCommand.__dict__))
            self.external_commands.append(elt)
        # Maybe we got a Message from the modules, it's way to ask something
        # like from now a full data from a scheduler for example.
        elif cls_type == 'message':
            # We got a message, great!
            logger.debug(str(elt.__dict__))
            if elt.get_type() == 'NeedData':
                data = elt.get_data()
                # Full instance id means: I got no data for this scheduler
                # so give me all dumbass!
                if 'full_instance_id' in data:
                    c_id = data['full_instance_id']
                    source = elt.source
                    logger.info('The module %s is asking me to get all initial data '
                                'from the scheduler %d',
                                source, c_id)
                    # so we just reset the connection and the running_id,
                    # it will just get all new things
                    try:
                        self.schedulers[c_id]['con'] = None
                        self.schedulers[c_id]['running_id'] = 0
                    except KeyError:  # maybe this instance was not known, forget it
                        logger.warning("the module %s ask me a full_instance_id "
                                       "for an unknown ID (%d)!", source, c_id)
            # Maybe a module tells me that it's dead, I must log it's last words...
            if elt.get_type() == 'ICrash':
                data = elt.get_data()
                logger.error('the module %s just crash! Please look at the traceback:',
                             data['name'])
                logger.error(data['trace'])

                # The module death will be looked for elsewhere and restarted.

    def get_links_from_type(self, d_type):
        """If d_type parameter is in list, return this object linked, else None

        :param d_type: name of object
        :type d_type: str
        :return: return the object linked
        :rtype: object
        """
        s_type = {'scheduler': self.schedulers,
                  'arbiter': self.arbiters,
                  'poller': self.pollers,
                  'reactionner': self.reactionners,
                  'receiver': self.receivers
                  }
        if d_type in s_type:
            return s_type[d_type]
        return None

    def is_connection_try_too_close(self, elt):
        """Check if last_connection has been made very recently

        :param elt: list with last_connection property
        :type elt: list
        :return: True if last connection has been made less than 5 seconds
        :rtype: bool
        """
        now = time.time()
        last_connection = elt['last_connection']
        if now - last_connection < 5:
            return True
        return False

    def pynag_con_init(self, _id, i_type='scheduler'):
        """Wrapper function for the real function do_
        just for timing the connection

        :param _id: id
        :type _id: int
        :param i_type: type of item
        :type i_type: str
        :return: do_pynag_con_init return always True, so we return always True
        :rtype: bool
        """
        _t0 = time.time()
        res = self.do_pynag_con_init(_id, i_type)
        statsmgr.incr('con-init.%s' % i_type, time.time() - _t0)
        return res

    def do_pynag_con_init(self, s_id, i_type='scheduler'):
        """Initialize or re-initialize connection with scheduler or arbiter if type == arbiter

        :param s_id: s_id
        :type s_id: int
        :param i_type: type of item
        :type i_type: str
        :return: None
        """
        # Get the good links tab for looping..
        links = self.get_links_from_type(i_type)
        if links is None:
            logger.debug('Type unknown for connection! %s', i_type)
            return

        # default timeout for daemons like pollers/reactionners/...
        timeout = 3
        data_timeout = 120

        if i_type == 'scheduler':
            # If sched is not active, I do not try to init
            # it is just useless
            is_active = links[s_id]['active']
            if not is_active:
                return
            # schedulers also got real timeout to respect
            timeout = links[s_id]['timeout']
            data_timeout = links[s_id]['data_timeout']

        # If we try to connect too much, we slow down our tests
        if self.is_connection_try_too_close(links[s_id]):
            return

        # Ok, we can now update it
        links[s_id]['last_connection'] = time.time()

        # DBG: print "Init connection with", links[s_id]['uri']
        running_id = links[s_id]['running_id']
        # DBG: print "Running id before connection", running_id
        uri = links[s_id]['uri']
        try:
            con = links[s_id]['con'] = HTTPClient(uri=uri,
                                                  strong_ssl=links[s_id]['hard_ssl_name_check'],
                                                  timeout=timeout, data_timeout=data_timeout)
        except HTTPEXCEPTIONS, exp:
            # But the multiprocessing module is not compatible with it!
            # so we must disable it immediately after
            logger.info("Connection problem to the %s %s: %s",
                        i_type, links[s_id]['name'], str(exp))
            links[s_id]['con'] = None
            return

        try:
            # initial ping must be quick
            con.get('ping')
            new_run_id = con.get('get_running_id')
            new_run_id = float(new_run_id)
            # data transfer can be longer

            # The schedulers have been restarted: it has a new run_id.
            # So we clear all verifs, they are obsolete now.
            if new_run_id != running_id:
                logger.debug("[%s] New running s_id for the %s %s: %s (was %s)",
                             self.name, i_type, links[s_id]['name'], new_run_id, running_id)
                links[s_id]['broks'].clear()
                # we must ask for a new full broks if
                # it's a scheduler
                if i_type == 'scheduler':
                    logger.debug("[%s] I ask for a broks generation to the scheduler %s",
                                 self.name, links[s_id]['name'])
                    con.get('fill_initial_broks', {'bname': self.name}, wait='long')
            # Ok all is done, we can save this new running s_id
            links[s_id]['running_id'] = new_run_id
        except HTTPEXCEPTIONS, exp:
            logger.info("Connection problem to the %s %s: %s",
                        i_type, links[s_id]['name'], str(exp))
            links[s_id]['con'] = None
            return
Пример #5
0
class Alignak(BaseSatellite):
    """Scheduler class. Referenced as "app" in most Interface

    """

    properties = BaseSatellite.properties.copy()
    properties.update({
        'daemon_type': StringProp(default='scheduler'),
        'pidfile': PathProp(default='schedulerd.pid'),
        'port': IntegerProp(default=7768),
        'local_log': PathProp(default='schedulerd.log'),
    })

    def __init__(self,
                 config_file,
                 is_daemon,
                 do_replace,
                 debug,
                 debug_file,
                 port=None,
                 local_log=None,
                 daemon_name=None):
        self.daemon_name = 'scheduler'
        if daemon_name:
            self.daemon_name = daemon_name

        BaseSatellite.__init__(self, self.daemon_name, config_file, is_daemon,
                               do_replace, debug, debug_file, port, local_log)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # stats part
        # --- copied from scheduler.py
        self.nb_pulled_checks = 0
        self.nb_pulled_actions = 0
        # self.nb_checks_send = 0

        self.nb_pushed_checks = 0
        self.nb_pushed_actions = 0

        self.nb_broks_send = 0
        self.nb_pulled_broks = 0
        # ---

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}

    def compensate_system_time_change(self, difference,
                                      timeperiods):  # pragma: no cover,
        # not with unit tests
        """Compensate a system time change of difference for all hosts/services/checks/notifs

        :param difference: difference in seconds
        :type difference: int
        :return: None
        """
        logger.warning(
            "A system time change of %d has been detected. Compensating...",
            difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        if not hasattr(self.sched, "conf"):
            # Race condition where time change before getting conf
            return

        # Then we compensate all host/services
        for host in self.sched.hosts:
            host.compensate_system_time_change(difference)
        for serv in self.sched.services:
            serv.compensate_system_time_change(difference)

        # Now all checks and actions
        for chk in self.sched.checks.values():
            # Already launch checks should not be touch
            if chk.status == 'scheduled' and chk.t_to_go is not None:
                t_to_go = chk.t_to_go
                ref = self.sched.find_item_by_id(chk.ref)
                new_t = max(0, t_to_go + difference)
                timeperiod = timeperiods[ref.check_period]
                if timeperiod is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = timeperiod.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    chk.state = 'waitconsume'
                    chk.exit_status = 2
                    chk.output = '(Error: there is no available check time after time change!)'
                    chk.check_time = time.time()
                    chk.execution_time = 0
                else:
                    chk.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for act in self.sched.actions.values():
            # Already launch checks should not be touch
            if act.status == 'scheduled':
                t_to_go = act.t_to_go

                #  Event handler do not have ref
                ref_id = getattr(act, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if act.is_a == 'notification':
                    ref = self.sched.find_item_by_id(ref_id)
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        notification_period = self.sched.timeperiods[
                            ref.notification_period]
                        new_t = notification_period.get_next_valid_time_from_t(
                            new_t)
                    # And got a creation_time variable too
                    act.creation_time += difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    act.state = 'waitconsume'
                    act.exit_status = 2
                    act.output = '(Error: there is no available check time after time change!)'
                    act.check_time = time.time()
                    act.execution_time = 0
                else:
                    act.t_to_go = new_t

    def manage_signal(self, sig, frame):
        """Manage signals caught by the daemon
        signal.SIGUSR1 : dump_memory
        signal.SIGUSR2 : dump_object (nothing)
        signal.SIGTERM, signal.SIGINT : terminate process

        :param sig: signal caught by daemon
        :type sig: str
        :param frame: current stack frame
        :type frame:
        :return: None
        TODO: Refactor with Daemon one
        """
        logger.info("scheduler process %d received a signal: %s", os.getpid(),
                    str(sig))
        # If we got USR1, just dump memory
        if sig == signal.SIGUSR1:
            self.sched.need_dump_memory = True
        elif sig == signal.SIGUSR2:  # usr2, dump objects
            self.sched.need_objects_dump = True
        else:  # if not, die :)
            logger.info("scheduler process %d is dying...", os.getpid())
            self.sched.die()
            self.must_run = False
            Daemon.manage_signal(self, sig, frame)

    def do_loop_turn(self):
        """Scheduler loop turn
        Basically wait initial conf and run

        :return: None
        """
        # Ok, now the conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return
        logger.info("New configuration received")
        self.setup_new_conf()
        logger.info(
            "[%s] New configuration loaded, scheduling for Alignak: %s",
            self.name, self.sched.alignak_name)
        self.sched.run()

    def setup_new_conf(self):  # pylint: disable=too-many-statements
        """Setup new conf received for scheduler

        :return: None
        """
        with self.conf_lock:
            self.clean_previous_run()
            new_conf = self.new_conf
            logger.info("[%s] Sending us a configuration", self.name)
            conf_raw = new_conf['conf']
            override_conf = new_conf['override_conf']
            modules = new_conf['modules']
            satellites = new_conf['satellites']
            instance_name = new_conf['instance_name']

            # Ok now we can save the retention data
            if hasattr(self.sched, 'conf'):
                self.sched.update_retention_file(forced=True)

            # horay, we got a name, we can set it in our stats objects
            statsmgr.register(instance_name,
                              'scheduler',
                              statsd_host=new_conf['statsd_host'],
                              statsd_port=new_conf['statsd_port'],
                              statsd_prefix=new_conf['statsd_prefix'],
                              statsd_enabled=new_conf['statsd_enabled'])

            t00 = time.time()
            try:
                conf = unserialize(conf_raw)
            except AlignakClassLookupException as exp:  # pragma: no cover, simple protection
                logger.error(
                    'Cannot un-serialize configuration received from arbiter: %s',
                    exp)
            logger.debug("Conf received at %d. Un-serialized in %d secs", t00,
                         time.time() - t00)
            self.new_conf = None

            if 'scheduler_name' in new_conf:
                name = new_conf['scheduler_name']
            else:
                name = instance_name
            self.name = name

            # Set my own process title
            self.set_proctitle(self.name)

            logger.info("[%s] Received a new configuration, containing: ",
                        self.name)
            for key in new_conf:
                logger.info("[%s] - %s", self.name, key)
            logger.info("[%s] configuration identifiers: %s (%s)", self.name,
                        new_conf['conf_uuid'], new_conf['push_flavor'])

            # Tag the conf with our data
            self.conf = conf
            self.conf.push_flavor = new_conf['push_flavor']
            self.conf.alignak_name = new_conf['alignak_name']
            self.conf.instance_name = instance_name
            self.conf.skip_initial_broks = new_conf['skip_initial_broks']
            self.conf.accept_passive_unknown_check_results = \
                new_conf['accept_passive_unknown_check_results']

            self.cur_conf = conf
            self.override_conf = override_conf
            self.modules = unserialize(modules, True)
            self.satellites = satellites

            # Now We create our pollers, reactionners and brokers
            for sat_type in ['pollers', 'reactionners', 'brokers']:
                if sat_type not in satellites:
                    continue
                for sat_id in satellites[sat_type]:
                    # Must look if we already have it
                    sats = getattr(self, sat_type)
                    sat = satellites[sat_type][sat_id]

                    sats[sat_id] = sat

                    if sat['name'] in override_conf['satellitemap']:
                        sat = dict(sat)  # make a copy
                        sat.update(override_conf['satellitemap'][sat['name']])

                    proto = 'http'
                    if sat['use_ssl']:
                        proto = 'https'
                    uri = '%s://%s:%s/' % (proto, sat['address'], sat['port'])

                    sats[sat_id]['uri'] = uri
                    sats[sat_id]['con'] = None
                    sats[sat_id]['running_id'] = 0
                    sats[sat_id]['last_connection'] = 0
                    sats[sat_id]['connection_attempt'] = 0
                    sats[sat_id]['max_failed_connections'] = 3
                    setattr(self, sat_type, sats)
                logger.debug("We have our %s: %s ", sat_type,
                             satellites[sat_type])
                logger.info("We have our %s:", sat_type)
                for daemon in satellites[sat_type].values():
                    logger.info(" - %s ", daemon['name'])

            # First mix conf and override_conf to have our definitive conf
            for prop in self.override_conf:
                val = self.override_conf[prop]
                setattr(self.conf, prop, val)

            if self.conf.use_timezone != '':
                logger.info("Setting our timezone to %s",
                            str(self.conf.use_timezone))
                os.environ['TZ'] = self.conf.use_timezone
                time.tzset()

            self.do_load_modules(self.modules)

            logger.info("Loading configuration.")
            self.conf.explode_global_conf()  # pylint: disable=E1101

            # we give sched it's conf
            self.sched.reset()
            self.sched.load_conf(self.conf)
            self.sched.load_satellites(self.pollers, self.reactionners,
                                       self.brokers)

            # We must update our Config dict macro with good value
            # from the config parameters
            self.sched.conf.fill_resource_macros_names_macros()

            # Creating the Macroresolver Class & unique instance
            m_solver = MacroResolver()
            m_solver.init(self.conf)

            # self.conf.dump()
            # self.conf.quick_debug()

            # Now create the external commands manager
            # We are an applyer: our role is not to dispatch commands, but to apply them
            ecm = ExternalCommandManager(self.conf, 'applyer', self.sched)

            # Scheduler needs to know about this external command manager to use it if necessary
            self.sched.set_external_commands_manager(ecm)
            # Update External Commands Manager
            self.sched.external_commands_manager.accept_passive_unknown_check_results = \
                self.sched.conf.accept_passive_unknown_check_results

            # We clear our schedulers managed (it's us :) )
            # and set ourselves in it
            self.schedulers = {self.conf.uuid: self.sched}  # pylint: disable=E1101

            # Ok now we can load the retention data
            self.sched.retention_load()

            # Create brok new conf
            brok = Brok({'type': 'new_conf', 'data': {}})
            self.sched.add_brok(brok)

    def what_i_managed(self):
        # pylint: disable=no-member
        """Get my managed dict (instance id and push_flavor)

        :return: dict containing instance_id key and push flavor value
        :rtype: dict
        """
        if hasattr(self, 'conf'):
            return {self.conf.uuid: self.conf.push_flavor}  # pylint: disable=E1101

        return {}

    def clean_previous_run(self):
        """Clean variables from previous configuration

        :return: None
        """
        # Clean all lists
        self.pollers.clear()
        self.reactionners.clear()
        self.brokers.clear()

    def main(self):
        """Main function for Scheduler, launch after the init::

        * Init daemon
        * Load module manager
        * Launch main loop
        * Catch any Exception that occurs

        :return: None
        """
        try:
            self.setup_alignak_logger()

            # Look if we are enabled or not. If ok, start the daemon mode
            self.look_for_early_exit()

            # todo:
            # This function returns False if some problem is detected during initialization
            # (eg. communication port not free)
            # Perharps we should stop the initialization process and exit?
            if not self.do_daemon_init_and_start():
                return

            self.load_modules_manager(self.name)

            self.uri = self.http_daemon.uri
            logger.info("[Scheduler] General interface is at: %s", self.uri)

            self.do_mainloop()
        except Exception:
            self.print_unrecoverable(traceback.format_exc())
            raise
Пример #6
0
class Broker(BaseSatellite):
    """
    Class to manage a Broker daemon
    A Broker is used to get data from Scheduler and send them to modules. These modules in most
    cases export to other software, databases...
    """
    properties = BaseSatellite.properties.copy()
    properties.update({
        'daemon_type':
            StringProp(default='broker'),
        'pidfile':
            PathProp(default='brokerd.pid'),
        'port':
            IntegerProp(default=7772),
        'local_log':
            PathProp(default='brokerd.log'),
    })

    def __init__(self, config_file, is_daemon, do_replace, debug, debug_file,
                 port=None, local_log=None, daemon_name=None):
        self.daemon_name = 'broker'
        if daemon_name:
            self.daemon_name = daemon_name

        super(Broker, self).__init__(self.daemon_name, config_file, is_daemon, do_replace, debug,
                                     debug_file, port, local_log)

        # Our arbiters
        self.arbiters = {}

        # Our pollers, reactionners and receivers
        self.pollers = {}
        self.reactionners = {}
        self.receivers = {}

        # Modules are load one time
        self.have_modules = False

        # Can have a queue of external_commands given by modules
        # will be processed by arbiter
        self.external_commands = []

        # All broks to manage
        self.broks = []  # broks to manage
        # broks raised this turn and that needs to be put in self.broks
        self.broks_internal_raised = []
        # broks raised by the arbiters, we need a lock so the push can be in parallel
        # to our current activities and won't lock the arbiter
        self.arbiter_broks = []
        self.arbiter_broks_lock = threading.RLock()

        self.timeout = 1.0

        self.http_interface = BrokerInterface(self)

    def add(self, elt):  # pragma: no cover, seems not to be used
        """Add elt to this broker

        Original comment : Schedulers have some queues. We can simplify the call by adding
          elements into the proper queue just by looking at their type  Brok -> self.broks
          TODO: better tag ID?
          External commands -> self.external_commands

        TODO: is it useful?

        :param elt: object to add
        :type elt: object
        :return: None
        """
        cls_type = elt.__class__.my_type
        if cls_type == 'brok':
            # We tag the broks with our instance_id
            elt.instance_id = self.instance_id
            self.broks_internal_raised.append(elt)
            return
        elif cls_type == 'externalcommand':
            self.external_commands.append(elt)
        # Maybe we got a Message from the modules, it's way to ask something
        # like from now a full data from a scheduler for example.
        elif cls_type == 'message':
            # We got a message, great!
            logger.debug(str(elt.__dict__))
            if elt.get_type() == 'NeedData':
                data = elt.get_data()
                # Full instance id means: I got no data for this scheduler
                # so give me all dumb-ass!
                if 'full_instance_id' in data:
                    c_id = data['full_instance_id']
                    source = elt.source
                    logger.info('The module %s is asking me to get all initial data '
                                'from the scheduler %d',
                                source, c_id)
                    # so we just reset the connection and the running_id,
                    # it will just get all new things
                    try:
                        self.schedulers[c_id]['con'] = None
                        self.schedulers[c_id]['running_id'] = 0
                    except KeyError:  # maybe this instance was not known, forget it
                        logger.warning("the module %s ask me a full_instance_id "
                                       "for an unknown ID (%d)!", source, c_id)
            # Maybe a module tells me that it's dead, I must log it's last words...
            if elt.get_type() == 'ICrash':
                data = elt.get_data()
                logger.error('the module %s just crash! Please look at the traceback:',
                             data['name'])
                logger.error(data['trace'])

                # The module death will be looked for elsewhere and restarted.

    def manage_brok(self, brok):
        """Get a brok.
        We put brok data to the modules

        :param brok: object with data
        :type brok: object
        :return: None
        """
        # Call all modules if they catch the call
        for mod in self.modules_manager.get_internal_instances():
            try:
                _t0 = time.time()
                mod.manage_brok(brok)
                statsmgr.timer('core.manage-broks.%s' % mod.get_name(), time.time() - _t0)
            except Exception as exp:  # pylint: disable=broad-except
                logger.warning("The mod %s raise an exception: %s, I'm tagging it to restart later",
                               mod.get_name(), str(exp))
                logger.exception(exp)
                self.modules_manager.set_to_restart(mod)

    def add_broks_to_queue(self, broks):
        """ Add broks to global queue

        :param broks: some items
        :type broks: object
        :return: None
        """
        # Ok now put in queue broks to be managed by
        # internal modules
        self.broks.extend(broks)

    def interger_internal_broks(self):
        """Get all broks from self.broks_internal_raised and we put them in self.broks

        :return: None
        """
        self.add_broks_to_queue(self.broks_internal_raised)
        self.broks_internal_raised = []

    def interger_arbiter_broks(self):
        """We will get in the broks list the broks from the arbiters,
        but as the arbiter_broks list can be push by arbiter without Global lock,
        we must protect this with he list lock

        :return: None
        """
        with self.arbiter_broks_lock:
            self.add_broks_to_queue(self.arbiter_broks)
            self.arbiter_broks = []

    def get_new_broks(self, s_type='scheduler'):
        """Get new broks from daemon defined in type parameter

        :param s_type: type of object
        :type s_type: str
        :return: None
        """
        # Get the good links tab for looping..
        links = self.get_links_from_type(s_type)
        if links is None:
            logger.debug('Type unknown for connection! %s', s_type)
            return

        # We check for new check in each schedulers and put
        # the result in new_checks
        for s_id in links:
            logger.debug("Getting broks from %s", links[s_id]['name'])
            link = links[s_id]
            logger.debug("Link: %s", link)
            if not link['active']:
                logger.debug("The %s '%s' is not active, "
                             "do not get broks from its connection!", s_type, link['name'])
                continue

            if link['con'] is None:
                if not self.daemon_connection_init(s_id, s_type=s_type):
                    if link['connection_attempt'] <= link['max_failed_connections']:
                        logger.warning("The connection for the %s '%s' cannot be established, "
                                       "it is not possible to get broks from this daemon.",
                                       s_type, link['name'])
                    else:
                        logger.error("The connection for the %s '%s' cannot be established, "
                                     "it is not possible to get broks from this daemon.",
                                     s_type, link['name'])
                    continue

            try:
                _t0 = time.time()
                tmp_broks = link['con'].get('get_broks', {'bname': self.name}, wait='long')
                try:
                    tmp_broks = unserialize(tmp_broks, True)
                except AlignakClassLookupException as exp:  # pragma: no cover,
                    # simple protection
                    logger.error('Cannot un-serialize data received from "get_broks" call: %s',
                                 exp)
                    continue
                if tmp_broks:
                    logger.debug("Got %d Broks from %s in %s",
                                 len(tmp_broks), link['name'], time.time() - _t0)
                statsmgr.timer('con-broks-get.%s' % (link['name']), time.time() - _t0)
                statsmgr.gauge('con-broks-count.%s' % (link['name']), len(tmp_broks.values()))
                for brok in tmp_broks.values():
                    brok.instance_id = link['instance_id']
                # Ok, we can add theses broks to our queues
                _t0 = time.time()
                self.add_broks_to_queue(tmp_broks.values())
                statsmgr.timer('con-broks-add.%s' % s_type, time.time() - _t0)
            except HTTPClientConnectionException as exp:  # pragma: no cover, simple protection
                logger.warning("[%s] %s", link['name'], str(exp))
                link['con'] = None
                return
            except HTTPClientTimeoutException as exp:  # pragma: no cover, simple protection
                logger.warning("Connection timeout with the %s '%s' when getting broks: %s",
                               s_type, link['name'], str(exp))
                link['con'] = None
                return
            except HTTPClientException as exp:  # pragma: no cover, simple protection
                logger.error("Error with the %s '%s' when getting broks: %s",
                             s_type, link['name'], str(exp))
                link['con'] = None
                return
            # scheduler must not have checks
            #  What the F**k? We do not know what happened,
            # so.. bye bye :)
            except Exception as exp:  # pylint: disable=broad-except
                logger.exception(exp)
                sys.exit(1)

    def get_retention_data(self):  # pragma: no cover, useful?
        """Get all broks

        TODO: using retention in the broker is dangerous and
        do not seem of any utility with Alignak

        :return: broks container
        :rtype: object
        """
        return self.broks

    def restore_retention_data(self, data):  # pragma: no cover, useful?
        """Add data to broks container

        TODO: using retention in the arbiter is dangerous and
        do not seem of any utility with Alignak

        :param data: broks to add
        :type data: list
        :return: None
        """
        self.broks.extend(data)

    def do_stop(self):
        """Stop all children of this process

        :return: None
        """
        act = active_children()
        for child in act:
            child.terminate()
            child.join(1)
        super(Broker, self).do_stop()

    def setup_new_conf(self):  # pylint: disable=R0915,R0912
        """Parse new configuration and initialize all required

        :return: None
        """

        with self.conf_lock:
            self.clean_previous_run()
            conf = unserialize(self.new_conf, True)
            self.new_conf = None
            self.cur_conf = conf
            # Got our name from the globals
            g_conf = conf['global']
            if 'broker_name' in g_conf:
                name = g_conf['broker_name']
            else:
                name = 'Unnamed broker'
            self.name = name
            # Set my own process title
            self.set_proctitle(self.name)

            logger.info("[%s] Received a new configuration, containing:", self.name)
            for key in conf:
                logger.info("[%s] - %s", self.name, key)
            logger.debug("[%s] global configuration part: %s", self.name, conf['global'])

            # local statsd
            self.statsd_host = g_conf['statsd_host']
            self.statsd_port = g_conf['statsd_port']
            self.statsd_prefix = g_conf['statsd_prefix']
            self.statsd_enabled = g_conf['statsd_enabled']

            # We got a name so we can update the logger and the stats global objects
            statsmgr.register(name, 'broker',
                              statsd_host=self.statsd_host, statsd_port=self.statsd_port,
                              statsd_prefix=self.statsd_prefix, statsd_enabled=self.statsd_enabled)

            # Get our Schedulers
            for sched_id in conf['schedulers']:
                # Must look if we already have it to do not overdie our broks

                old_sched_id = self.get_previous_sched_id(conf['schedulers'][sched_id], sched_id)

                if old_sched_id:
                    logger.info("[%s] We already got the conf %s (%s)",
                                self.name, old_sched_id, name)
                    broks = self.schedulers[old_sched_id]['broks']
                    running_id = self.schedulers[old_sched_id]['running_id']
                    del self.schedulers[old_sched_id]
                else:
                    broks = {}
                    running_id = 0
                sched = conf['schedulers'][sched_id]
                self.schedulers[sched_id] = sched

                # replacing scheduler address and port by those defined in satellitemap
                if sched['name'] in g_conf['satellitemap']:
                    sched = dict(sched)  # make a copy
                    sched.update(g_conf['satellitemap'][sched['name']])

                # todo: why not using a SatteliteLink object?
                proto = 'http'
                if sched['use_ssl']:
                    proto = 'https'
                uri = '%s://%s:%s/' % (proto, sched['address'], sched['port'])
                self.schedulers[sched_id]['uri'] = uri

                self.schedulers[sched_id]['broks'] = broks
                self.schedulers[sched_id]['instance_id'] = sched['instance_id']
                self.schedulers[sched_id]['running_id'] = running_id
                self.schedulers[sched_id]['active'] = sched['active']
                self.schedulers[sched_id]['last_connection'] = 0
                self.schedulers[sched_id]['timeout'] = sched['timeout']
                self.schedulers[sched_id]['data_timeout'] = sched['data_timeout']
                self.schedulers[sched_id]['con'] = None
                self.schedulers[sched_id]['last_connection'] = 0
                self.schedulers[sched_id]['connection_attempt'] = 0
                self.schedulers[sched_id]['max_failed_connections'] = 3

            logger.debug("We have our schedulers: %s", self.schedulers)
            logger.info("We have our schedulers:")
            for daemon in self.schedulers.values():
                logger.info(" - %s ", daemon['name'])

            # Now get arbiters
            for arb_id in conf['arbiters']:
                # Must look if we already have it
                already_got = arb_id in self.arbiters
                if already_got:
                    broks = self.arbiters[arb_id]['broks']
                else:
                    broks = {}
                arb = conf['arbiters'][arb_id]
                self.arbiters[arb_id] = arb

                # replacing arbiter address and port by those defined in satellitemap
                if arb['name'] in g_conf['satellitemap']:
                    arb = dict(arb)  # make a copy
                    arb.update(g_conf['satellitemap'][arb['name']])

                # todo: why not using a SatteliteLink object?
                proto = 'http'
                if arb['use_ssl']:
                    proto = 'https'
                uri = '%s://%s:%s/' % (proto, arb['address'], arb['port'])
                self.arbiters[arb_id]['uri'] = uri

                self.arbiters[arb_id]['broks'] = broks
                self.arbiters[arb_id]['instance_id'] = 0  # No use so all to 0
                self.arbiters[arb_id]['running_id'] = 0
                self.arbiters[arb_id]['con'] = None
                self.arbiters[arb_id]['last_connection'] = 0
                self.arbiters[arb_id]['connection_attempt'] = 0
                self.arbiters[arb_id]['max_failed_connections'] = 3

                # We do not connect to the arbiter. Connection hangs

            logger.debug("We have our arbiters: %s ", self.arbiters)
            logger.info("We have our arbiters:")
            for daemon in self.arbiters.values():
                logger.info(" - %s ", daemon['name'])

            # Now for pollers
            # 658: temporary fix
            if 'pollers' in conf:
                for pol_id in conf['pollers']:
                    # Must look if we already have it
                    already_got = pol_id in self.pollers
                    if already_got:
                        broks = self.pollers[pol_id]['broks']
                        running_id = self.pollers[pol_id]['running_id']
                    else:
                        broks = {}
                        running_id = 0
                    poll = conf['pollers'][pol_id]
                    self.pollers[pol_id] = poll

                    # replacing poller address and port by those defined in satellitemap
                    if poll['name'] in g_conf['satellitemap']:
                        poll = dict(poll)  # make a copy
                        poll.update(g_conf['satellitemap'][poll['name']])

                    # todo: why not using a SatteliteLink object?
                    proto = 'http'
                    if poll['use_ssl']:
                        proto = 'https'

                    uri = '%s://%s:%s/' % (proto, poll['address'], poll['port'])
                    self.pollers[pol_id]['uri'] = uri

                    self.pollers[pol_id]['broks'] = broks
                    self.pollers[pol_id]['instance_id'] = 0  # No use so all to 0
                    self.pollers[pol_id]['running_id'] = running_id
                    self.pollers[pol_id]['con'] = None
                    self.pollers[pol_id]['last_connection'] = 0
                    self.pollers[pol_id]['connection_attempt'] = 0
                    self.pollers[pol_id]['max_failed_connections'] = 3
            else:
                logger.warning("[%s] no pollers in the received configuration", self.name)

            logger.debug("We have our pollers: %s", self.pollers)
            logger.info("We have our pollers:")
            for daemon in self.pollers.values():
                logger.info(" - %s ", daemon['name'])

            # Now reactionners
            # 658: temporary fix
            if 'reactionners' in conf:
                for rea_id in conf['reactionners']:
                    # Must look if we already have it
                    already_got = rea_id in self.reactionners
                    if already_got:
                        broks = self.reactionners[rea_id]['broks']
                        running_id = self.reactionners[rea_id]['running_id']
                    else:
                        broks = {}
                        running_id = 0

                    reac = conf['reactionners'][rea_id]
                    self.reactionners[rea_id] = reac

                    # replacing reactionner address and port by those defined in satellitemap
                    if reac['name'] in g_conf['satellitemap']:
                        reac = dict(reac)  # make a copy
                        reac.update(g_conf['satellitemap'][reac['name']])

                    # todo: why not using a SatteliteLink object?
                    proto = 'http'
                    if reac['use_ssl']:
                        proto = 'https'
                    uri = '%s://%s:%s/' % (proto, reac['address'], reac['port'])
                    self.reactionners[rea_id]['uri'] = uri

                    self.reactionners[rea_id]['broks'] = broks
                    self.reactionners[rea_id]['instance_id'] = 0  # No use so all to 0
                    self.reactionners[rea_id]['running_id'] = running_id
                    self.reactionners[rea_id]['con'] = None
                    self.reactionners[rea_id]['last_connection'] = 0
                    self.reactionners[rea_id]['connection_attempt'] = 0
                    self.reactionners[rea_id]['max_failed_connections'] = 3
            else:
                logger.warning("[%s] no reactionners in the received configuration", self.name)

            logger.debug("We have our reactionners: %s", self.reactionners)
            logger.info("We have our reactionners:")
            for daemon in self.reactionners.values():
                logger.info(" - %s ", daemon['name'])

            # Now receivers
            # 658: temporary fix
            if 'receivers' in conf:
                for rec_id in conf['receivers']:
                    # Must look if we already have it
                    already_got = rec_id in self.receivers
                    if already_got:
                        broks = self.receivers[rec_id]['broks']
                        running_id = self.receivers[rec_id]['running_id']
                    else:
                        broks = {}
                        running_id = 0

                    rec = conf['receivers'][rec_id]
                    self.receivers[rec_id] = rec

                    # replacing reactionner address and port by those defined in satellitemap
                    if rec['name'] in g_conf['satellitemap']:
                        rec = dict(rec)  # make a copy
                        rec.update(g_conf['satellitemap'][rec['name']])

                    # todo: why not using a SatteliteLink object?
                    proto = 'http'
                    if rec['use_ssl']:
                        proto = 'https'
                    uri = '%s://%s:%s/' % (proto, rec['address'], rec['port'])
                    self.receivers[rec_id]['uri'] = uri

                    self.receivers[rec_id]['broks'] = broks
                    self.receivers[rec_id]['instance_id'] = rec['instance_id']
                    self.receivers[rec_id]['running_id'] = running_id
                    self.receivers[rec_id]['con'] = None
                    self.receivers[rec_id]['last_connection'] = 0
                    self.receivers[rec_id]['connection_attempt'] = 0
                    self.receivers[rec_id]['max_failed_connections'] = 3
            else:
                logger.warning("[%s] no receivers in the received configuration", self.name)

            logger.debug("We have our receivers: %s", self.receivers)
            logger.info("We have our receivers:")
            for daemon in self.receivers.values():
                logger.info(" - %s ", daemon['name'])

            if not self.have_modules:
                self.modules = conf['global']['modules']
                self.have_modules = True

                # Ok now start, or restart them!
                # Set modules, init them and start external ones
                self.do_load_modules(self.modules)
                self.modules_manager.start_external_instances()

            # Set our giving timezone from arbiter
            use_timezone = conf['global']['use_timezone']
            if use_timezone != 'NOTSET':
                logger.info("Setting our timezone to %s", use_timezone)
                os.environ['TZ'] = use_timezone
                time.tzset()

            # Initialize connection with Schedulers, Pollers and Reactionners
            for sched_id in self.schedulers:
                self.daemon_connection_init(sched_id, s_type='scheduler')

            for pol_id in self.pollers:
                self.daemon_connection_init(pol_id, s_type='poller')

            for rea_id in self.reactionners:
                self.daemon_connection_init(rea_id, s_type='reactionner')

    def clean_previous_run(self):
        """Clean all (when we received new conf)

        :return: None
        """
        # Clean all lists
        self.schedulers.clear()
        self.pollers.clear()
        self.reactionners.clear()
        self.receivers.clear()
        self.broks = self.broks[:]
        self.arbiters.clear()
        self.broks_internal_raised = self.broks_internal_raised[:]
        with self.arbiter_broks_lock:
            self.arbiter_broks = self.arbiter_broks[:]
        self.external_commands = self.external_commands[:]

        # And now modules
        self.have_modules = False
        self.modules_manager.clear_instances()

    def get_stats_struct(self):
        """Get information of modules (internal and external) and add metrics of them

        :return: dictionary with state of all modules (internal and external)
        :rtype: dict
        :return: None
        """
        now = int(time.time())
        # call the daemon one
        res = super(Broker, self).get_stats_struct()
        res.update({'name': self.name, 'type': 'broker'})
        metrics = res['metrics']
        # metrics specific
        metrics.append('broker.%s.external-commands.queue %d %d' % (
            self.name, len(self.external_commands), now))
        metrics.append('broker.%s.broks.queue %d %d' % (self.name, len(self.broks), now))
        return res

    def do_loop_turn(self):
        """Loop use to:
         * check if modules are alive, if not restart them
         * add broks to queue of each modules

         :return: None
        """
        logger.debug("Begin Loop: managing old broks (%d)", len(self.broks))

        # Dump modules Queues size
        insts = [inst for inst in self.modules_manager.instances if inst.is_external]
        for inst in insts:
            try:
                logger.debug("External Queue len (%s): %s", inst.get_name(), inst.to_q.qsize())
            except Exception, exp:  # pylint: disable=W0703
                logger.debug("External Queue len (%s): Exception! %s", inst.get_name(), exp)

        # Begin to clean modules
        self.check_and_del_zombie_modules()

        # Now we check if we received a new configuration - no sleep time, we will sleep later...
        self.watch_for_new_conf()
        if self.new_conf:
            self.setup_new_conf()

        # Maybe the last loop we did raised some broks internally
        _t0 = time.time()
        # we should integrate them in broks
        self.interger_internal_broks()
        statsmgr.timer('get-new-broks.broker', time.time() - _t0)

        _t0 = time.time()
        # Also reap broks sent from the arbiters
        self.interger_arbiter_broks()
        statsmgr.timer('get-new-broks.arbiter', time.time() - _t0)

        # Main job, go get broks in our distant daemons
        types = ['scheduler', 'poller', 'reactionner', 'receiver']
        for _type in types:
            _t0 = time.time()
            # And from schedulers
            self.get_new_broks(s_type=_type)
            statsmgr.timer('get-new-broks.%s' % _type, time.time() - _t0)

        # Sort the brok list by id
        self.broks.sort(sort_by_ids)

        # and for external queues
        # REF: doc/broker-modules.png (3)
        # We put to external queues broks that was not already send
        t00 = time.time()
        # We are sending broks as a big list, more efficient than one by one
        ext_modules = self.modules_manager.get_external_instances()
        to_send = [brok for brok in self.broks if getattr(brok, 'need_send_to_ext', True)]

        # Send our pack to all external modules to_q queue so they can get the whole packet
        # beware, the sub-process/queue can be die/close, so we put to restart the whole module
        # instead of killing ourselves :)
        for mod in ext_modules:
            try:
                t000 = time.time()
                mod.to_q.put(to_send)
                statsmgr.timer('core.put-to-external-queue.%s' % mod.get_name(), time.time() - t000)
            except Exception as exp:  # pylint: disable=broad-except
                # first we must find the modules
                logger.warning("The mod %s queue raise an exception: %s, "
                               "I'm tagging it to restart later",
                               mod.get_name(), str(exp))
                logger.exception(exp)
                self.modules_manager.set_to_restart(mod)

        # No more need to send them
        for brok in to_send:
            brok.need_send_to_ext = False
        statsmgr.timer('core.put-to-external-queue', time.time() - t00)
        logger.debug("Time to send %s broks (%d secs)", len(to_send), time.time() - t00)

        # We must add new broks at the end of the list, so we reverse the list
        self.broks.reverse()

        start = time.time()
        while self.broks:
            now = time.time()
            # Do not 'manage' more than 1s, we must get new broks
            # every 1s
            if now - start > 1:
                break

            brok = self.broks.pop()
            # Ok, we can get the brok, and doing something with it
            # REF: doc/broker-modules.png (4-5)
            # We un serialize the brok before consume it
            brok.prepare()
            _t0 = time.time()
            self.manage_brok(brok)
            statsmgr.timer('core.manage-broks', time.time() - _t0)

            nb_broks = len(self.broks)

            # Ok we manage brok, but we still want to listen to arbiter even for a very short time
            self.make_a_pause(0.01, check_time_change=False)

            # if we got new broks here from arbiter, we should break the loop
            # because such broks will not be managed by the
            # external modules before this loop (we pop them!)
            if len(self.broks) != nb_broks:
                break

        # Maybe external modules raised 'objects'
        # we should get them
        self.get_objects_from_from_queues()

        # Maybe we do not have something to do, so we wait a little
        # TODO: redone the diff management....
        if not self.broks:
            while self.timeout > 0:
                begin = time.time()
                self.watch_for_new_conf(1.0)
                end = time.time()
                self.timeout = self.timeout - (end - begin)
            self.timeout = 1.0

        # Say to modules it's a new tick :)
        self.hook_point('tick')
Пример #7
0
class Alignak(BaseSatellite):
    """Scheduler class. Referenced as "app" in most Interface

    """

    properties = BaseSatellite.properties.copy()
    properties.update({
        'pidfile': PathProp(default='schedulerd.pid'),
        'port': IntegerProp(default=7768),
        'local_log': PathProp(default='schedulerd.log'),
    })

    def __init__(self,
                 config_file,
                 is_daemon,
                 do_replace,
                 debug,
                 debug_file,
                 profile=''):

        BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon,
                               do_replace, debug, debug_file)

        self.http_interface = SchedulerInterface(self)
        self.sched = Scheduler(self)

        self.must_run = True

        # Now the interface
        self.uri = None
        self.uri2 = None

        # And possible links for satellites
        # from now only pollers
        self.pollers = {}
        self.reactionners = {}
        self.brokers = {}

    def compensate_system_time_change(self, difference):
        """Compensate a system time change of difference for all hosts/services/checks/notifs

        :param difference: difference in seconds
        :type difference: int
        :return: None
        """
        logger.warning(
            "A system time change of %d has been detected. Compensating...",
            difference)
        # We only need to change some value
        self.program_start = max(0, self.program_start + difference)

        if not hasattr(self.sched, "conf"):
            # Race condition where time change before getting conf
            return

        # Then we compensate all host/services
        for host in self.sched.hosts:
            host.compensate_system_time_change(difference)
        for serv in self.sched.services:
            serv.compensate_system_time_change(difference)

        # Now all checks and actions
        for chk in self.sched.checks.values():
            # Already launch checks should not be touch
            if chk.status == 'scheduled' and chk.t_to_go is not None:
                t_to_go = chk.t_to_go
                ref = chk.ref
                new_t = max(0, t_to_go + difference)
                if ref.check_period is not None:
                    # But it's no so simple, we must match the timeperiod
                    new_t = ref.check_period.get_next_valid_time_from_t(new_t)
                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    chk.state = 'waitconsume'
                    chk.exit_status = 2
                    chk.output = '(Error: there is no available check time after time change!)'
                    chk.check_time = time.time()
                    chk.execution_time = 0
                else:
                    chk.t_to_go = new_t
                    ref.next_chk = new_t

        # Now all checks and actions
        for act in self.sched.actions.values():
            # Already launch checks should not be touch
            if act.status == 'scheduled':
                t_to_go = act.t_to_go

                #  Event handler do not have ref
                ref = getattr(act, 'ref', None)
                new_t = max(0, t_to_go + difference)

                # Notification should be check with notification_period
                if act.is_a == 'notification':
                    if ref.notification_period:
                        # But it's no so simple, we must match the timeperiod
                        new_t = ref.notification_period.get_next_valid_time_from_t(
                            new_t)
                    # And got a creation_time variable too
                    act.creation_time += difference

                # But maybe no there is no more new value! Not good :(
                # Say as error, with error output
                if new_t is None:
                    act.state = 'waitconsume'
                    act.exit_status = 2
                    act.output = '(Error: there is no available check time after time change!)'
                    act.check_time = time.time()
                    act.execution_time = 0
                else:
                    act.t_to_go = new_t

    def manage_signal(self, sig, frame):
        """Manage signals caught by the daemon
        signal.SIGUSR1 : dump_memory
        signal.SIGUSR2 : dump_object (nothing)
        signal.SIGTERM, signal.SIGINT : terminate process

        :param sig: signal caught by daemon
        :type sig: str
        :param frame: current stack frame
        :type frame:
        :return: None
        TODO: Refactor with Daemon one
        """
        logger.warning("%s > Received a SIGNAL %s", process.current_process(),
                       sig)
        # If we got USR1, just dump memory
        if sig == signal.SIGUSR1:
            self.sched.need_dump_memory = True
        elif sig == signal.SIGUSR2:  # usr2, dump objects
            self.sched.need_objects_dump = True
        else:  # if not, die :)
            self.sched.die()
            self.must_run = False
            Daemon.manage_signal(self, sig, frame)

    def do_loop_turn(self):
        """Scheduler loop turn
        Basically wait initial conf and run

        :return: None
        """
        # Ok, now the conf
        self.wait_for_initial_conf()
        if not self.new_conf:
            return
        logger.info("New configuration received")
        self.setup_new_conf()
        logger.info("New configuration loaded")
        self.sched.run()

    def setup_new_conf(self):
        """Setup new conf received for scheduler

        :return: None
        """
        with self.conf_lock:
            new_c = self.new_conf
            conf_raw = new_c['conf']
            override_conf = new_c['override_conf']
            modules = new_c['modules']
            satellites = new_c['satellites']
            instance_name = new_c['instance_name']
            push_flavor = new_c['push_flavor']
            skip_initial_broks = new_c['skip_initial_broks']
            accept_passive_unknown_chk_res = new_c[
                'accept_passive_unknown_check_results']
            api_key = new_c['api_key']
            secret = new_c['secret']
            http_proxy = new_c['http_proxy']
            statsd_host = new_c['statsd_host']
            statsd_port = new_c['statsd_port']
            statsd_prefix = new_c['statsd_prefix']
            statsd_enabled = new_c['statsd_enabled']

            # horay, we got a name, we can set it in our stats objects
            statsmgr.register(self.sched,
                              instance_name,
                              'scheduler',
                              api_key=api_key,
                              secret=secret,
                              http_proxy=http_proxy,
                              statsd_host=statsd_host,
                              statsd_port=statsd_port,
                              statsd_prefix=statsd_prefix,
                              statsd_enabled=statsd_enabled)

            t00 = time.time()
            conf = cPickle.loads(conf_raw)
            logger.debug("Conf received at %d. Unserialized in %d secs", t00,
                         time.time() - t00)
            self.new_conf = None

            # Tag the conf with our data
            self.conf = conf
            self.conf.push_flavor = push_flavor
            self.conf.instance_name = instance_name
            self.conf.skip_initial_broks = skip_initial_broks
            self.conf.accept_passive_unknown_check_results = accept_passive_unknown_chk_res

            self.cur_conf = conf
            self.override_conf = override_conf
            self.modules = modules
            self.satellites = satellites
            # self.pollers = self.app.pollers

            if self.conf.human_timestamp_log:
                # pylint: disable=E1101
                logger.set_human_format()

            # Now We create our pollers
            for pol_id in satellites['pollers']:
                # Must look if we already have it
                already_got = pol_id in self.pollers
                poll = satellites['pollers'][pol_id]
                self.pollers[pol_id] = poll

                if poll['name'] in override_conf['satellitemap']:
                    poll = dict(poll)  # make a copy
                    poll.update(override_conf['satellitemap'][poll['name']])

                proto = 'http'
                if poll['use_ssl']:
                    proto = 'https'
                uri = '%s://%s:%s/' % (proto, poll['address'], poll['port'])
                self.pollers[pol_id]['uri'] = uri
                self.pollers[pol_id]['last_connection'] = 0

            # Now We create our reactionners
            for reac_id in satellites['reactionners']:
                # Must look if we already have it
                already_got = reac_id in self.reactionners
                reac = satellites['reactionners'][reac_id]
                self.reactionners[reac_id] = reac

                if reac['name'] in override_conf['satellitemap']:
                    reac = dict(reac)  # make a copy
                    reac.update(override_conf['satellitemap'][reac['name']])

                proto = 'http'
                if poll['use_ssl']:
                    proto = 'https'
                uri = '%s://%s:%s/' % (proto, reac['address'], reac['port'])
                self.reactionners[reac_id]['uri'] = uri
                self.reactionners[reac_id]['last_connection'] = 0

            # First mix conf and override_conf to have our definitive conf
            for prop in self.override_conf:
                val = self.override_conf[prop]
                setattr(self.conf, prop, val)

            if self.conf.use_timezone != '':
                logger.debug("Setting our timezone to %s",
                             str(self.conf.use_timezone))
                os.environ['TZ'] = self.conf.use_timezone
                time.tzset()

            if len(self.modules) != 0:
                logger.debug("I've got %s modules", str(self.modules))

            # TODO: if scheduler had previous modules instanciated it must clean them!
            self.do_load_modules(self.modules)

            logger.info("Loading configuration.")
            self.conf.explode_global_conf()

            # we give sched it's conf
            self.sched.reset()
            self.sched.load_conf(self.conf)
            self.sched.load_satellites(self.pollers, self.reactionners)

            # We must update our Config dict macro with good value
            # from the config parameters
            self.sched.conf.fill_resource_macros_names_macros()
            # print "DBG: got macros", self.sched.conf.macros

            # Creating the Macroresolver Class & unique instance
            m_solver = MacroResolver()
            m_solver.init(self.conf)

            # self.conf.dump()
            # self.conf.quick_debug()

            # Now create the external commander
            # it's a applyer: it role is not to dispatch commands,
            # but to apply them
            ecm = ExternalCommandManager(self.conf, 'applyer')

            # Scheduler need to know about external command to
            # activate it if necessary
            self.sched.load_external_command(ecm)

            # External command need the sched because he can raise checks
            ecm.load_scheduler(self.sched)

            # We clear our schedulers managed (it's us :) )
            # and set ourself in it
            self.schedulers = {self.conf.instance_id: self.sched}

    def what_i_managed(self):
        """Get my managed dict (instance id and push_flavor)

        :return: dict containing instance_id key and push flavor value
        :rtype: dict
        """
        if hasattr(self, 'conf'):
            return {self.conf.instance_id: self.conf.push_flavor}
        else:
            return {}

    def main(self):
        """Main function for Scheduler, launch after the init::

        * Init daemon
        * Load module manager
        * Launch main loop
        * Catch any Exception that occurs

        :return: None
        """
        try:
            self.load_config_file()
            # Setting log level
            logger.setLevel(self.log_level)
            # Force the debug level if the daemon is said to start with such level
            if self.debug:
                logger.setLevel('DEBUG')

            self.look_for_early_exit()
            self.do_daemon_init_and_start()
            self.load_modules_manager()

            self.uri = self.http_daemon.uri
            logger.info("[scheduler] General interface is at: %s", self.uri)
            self.do_mainloop()
        except Exception, exp:
            self.print_unrecoverable(traceback.format_exc())
            raise