Exemplo n.º 1
0
    def get_new_broks(self):
        """Get new broks from our satellites

        :return: None
        """
        for satellites in [
                self.schedulers, self.pollers, self.reactionners,
                self.receivers
        ]:
            for satellite_link in list(satellites.values()):
                logger.debug("Getting broks from %s", satellite_link)

                _t0 = time.time()
                try:
                    tmp_broks = satellite_link.get_broks(self.name)
                except LinkError:
                    logger.warning(
                        "Daemon %s connection failed, I could not get the broks!",
                        satellite_link)
                else:
                    if tmp_broks:
                        logger.debug("Got %d Broks from %s in %s",
                                     len(tmp_broks), satellite_link.name,
                                     time.time() - _t0)
                        statsmgr.gauge(
                            'get-new-broks-count.%s' % (satellite_link.name),
                            len(tmp_broks))
                        statsmgr.timer(
                            'get-new-broks-time.%s' % (satellite_link.name),
                            time.time() - _t0)
                        for brok in tmp_broks:
                            brok.instance_id = satellite_link.instance_id

                        # Add the broks to our global list
                        self.external_broks.extend(tmp_broks)
Exemplo n.º 2
0
    def get_new_actions(self):
        """ Wrapper function for do_get_new_actions
        For stats purpose

        :return: None
        TODO: Use a decorator for timing this function
        """
        try:
            _t0 = time.time()
            self.do_get_new_actions()
            statsmgr.timer('actions.got.time', time.time() - _t0)
        except RuntimeError:
            logger.error("Exception like issue #1007")
    def do_loop_turn(self):
        """Scheduler loop turn

        Simply run the FusionSupervision Engine scheduler loop

        This is called when a configuration got received by the scheduler daemon. As of it,
        check if the first scheduling has been done... and manage this.

        :return: None
        """
        if not self.first_scheduling:
            # Ok, now all is initialized, we can make the initial broks
            logger.info("First scheduling launched")
            _t0 = time.time()
            # Program start brok
            self.sched.initial_program_status()
            # First scheduling
            self.sched.schedule()
            statsmgr.timer('first_scheduling', time.time() - _t0)
            logger.info("First scheduling done")

            # Connect to our passive satellites if needed
            for satellite in [s for s in list(self.pollers.values()) if s.passive]:
                if not self.daemon_connection_init(satellite):
                    logger.error("Passive satellite connection failed: %s", satellite)

            for satellite in [s for s in list(self.reactionners.values()) if s.passive]:
                if not self.daemon_connection_init(satellite):
                    logger.error("Passive satellite connection failed: %s", satellite)

            # Ticks are for recurrent function call like consume, del zombies etc
            self.sched.ticks = 0
            self.first_scheduling = True

        # Each loop turn, execute the daemon specific treatment...
        # only if the daemon has a configuration to manage
        if self.sched.pushed_conf:
            # If scheduling is not yet enabled, enable scheduling
            if not self.sched.must_schedule:
                self.sched.start_scheduling()
                self.sched.before_run()
            self.sched.run()
        else:
            logger.warning("#%d - No monitoring configuration to scheduler...",
                           self.loop_count)
Exemplo n.º 4
0
    def manage_brok(self, brok):
        """Get a brok.
        We put brok data to the modules

        :param brok: object with data
        :type brok: object
        :return: None
        """
        # Unserialize the brok before consuming it
        brok.prepare()

        for module in self.modules_manager.get_internal_instances():
            try:
                _t0 = time.time()
                module.manage_brok(brok)
                statsmgr.timer('manage-broks.internal.%s' % module.get_name(),
                               time.time() - _t0)
            except Exception as exp:  # pylint: disable=broad-except
                logger.warning(
                    "The module %s raised an exception: %s, "
                    "I'm tagging it to restart later", module.get_name(),
                    str(exp))
                logger.exception(exp)
                self.modules_manager.set_to_restart(module)
Exemplo n.º 5
0
    def do_loop_turn(self):
        """Receiver daemon main loop

        :return: None
        """

        # Begin to clean modules
        self.check_and_del_zombie_modules()

        # Maybe the arbiter pushed a new configuration...
        if self.watch_for_new_conf(timeout=0.05):
            logger.info("I got a new configuration...")
            # Manage the new configuration
            self.setup_new_conf()

        # Maybe external modules raised 'objects'
        # we should get them
        _t0 = time.time()
        self.get_objects_from_from_queues()
        statsmgr.timer('core.get-objects-from-queues', time.time() - _t0)

        # Get external commands from the arbiters...
        _t0 = time.time()
        self.get_external_commands_from_arbiters()
        statsmgr.timer('external-commands.got.time', time.time() - _t0)
        statsmgr.gauge('external-commands.got.count',
                       len(self.unprocessed_external_commands))

        _t0 = time.time()
        self.push_external_commands_to_schedulers()
        statsmgr.timer('external-commands.pushed.time', time.time() - _t0)

        # Say to modules it's a new tick :)
        _t0 = time.time()
        self.hook_point('tick')
        statsmgr.timer('hook.tick', time.time() - _t0)
Exemplo n.º 6
0
    def do_loop_turn(self):
        # pylint: disable=too-many-branches
        """Loop used to:
         * get initial status broks
         * check if modules are alive, if not restart them
         * get broks from ourself, the arbiters and our satellites
         * add broks to the queue of each external module
         * manage broks with each internal module

         If the internal broks management is longer than 0.8 seconds, postpone to hte next
         loop turn to avoid overloading the broker daemon.

         :return: None
        """
        if not self.got_initial_broks:
            # Asking initial broks from my schedulers
            my_satellites = self.get_links_of_type(s_type='scheduler')
            for satellite in list(my_satellites.values()):
                logger.info("Asking my initial broks from '%s'",
                            satellite.name)
                _t0 = time.time()
                try:
                    my_initial_broks = satellite.get_initial_broks(self.name)
                    statsmgr.timer('broks.initial.%s.time' % satellite.name,
                                   time.time() - _t0)
                    if not my_initial_broks:
                        logger.info("No initial broks were raised, "
                                    "my scheduler is not yet ready...")
                        return

                    self.got_initial_broks = True
                    logger.debug("Got %d initial broks from '%s'",
                                 my_initial_broks, satellite.name)
                    statsmgr.gauge('broks.initial.%s.count' % satellite.name,
                                   my_initial_broks)
                except LinkError as exp:
                    logger.warning(
                        "Scheduler connection failed, I could not get initial broks!"
                    )

        logger.debug("Begin Loop: still some old broks to manage (%d)",
                     len(self.external_broks))
        if self.external_broks:
            statsmgr.gauge('unmanaged.broks', len(self.external_broks))

        # Try to see if one of my module is dead, and restart previously dead modules
        self.check_and_del_zombie_modules()

        # Call modules that manage a starting tick pass
        _t0 = time.time()
        self.hook_point('tick')
        statsmgr.timer('hook.tick', time.time() - _t0)

        # Maybe the last loop we did raised some broks internally
        self.get_internal_broks()

        # Also reap broks sent from the arbiters
        self.get_arbiter_broks()

        # Now get broks from our distant daemons
        self.get_new_broks()

        # Get the list of broks not yet sent to our external modules
        _t0 = time.time()
        broks_to_send = [
            brok for brok in self.external_broks
            if getattr(brok, 'to_be_sent', True)
        ]
        statsmgr.gauge('get-new-broks-count.to_send', len(broks_to_send))

        # Send the broks to all external modules to_q queue so they can get the whole packet
        # beware, the sub-process/queue can be die/close, so we put to restart the whole module
        # instead of killing ourselves :)
        for module in self.modules_manager.get_external_instances():
            try:
                _t00 = time.time()
                queue_size = module.to_q.qsize()
                statsmgr.gauge(
                    'queues.external.%s.to.size' % module.get_name(),
                    queue_size)
                module.to_q.put(broks_to_send)
                statsmgr.timer('queues.external.%s.to.put' % module.get_name(),
                               time.time() - _t00)
            except Exception as exp:  # pylint: disable=broad-except
                # first we must find the modules
                logger.warning(
                    "Module %s queue exception: %s, I'm tagging it to restart later",
                    module.get_name(), str(exp))
                logger.exception(exp)
                self.modules_manager.set_to_restart(module)

        # No more need to send them
        for brok in broks_to_send:
            brok.to_be_sent = False
        logger.debug("Time to send %s broks (%d secs)", len(broks_to_send),
                     time.time() - _t0)

        # Make the internal modules manage the broks
        start = time.time()
        while self.external_broks:
            now = time.time()
            # Do not 'manage' more than 0.8s, we must get new broks almost every second
            if now - start > 0.8:
                logger.info(
                    "I did not yet managed all my broks, still %d broks",
                    len(self.external_broks))
                break

            # Get the first brok in the list
            brok = self.external_broks.pop(0)
            if self.modules_manager.get_internal_instances():
                self.manage_brok(brok)
                # Make a very short pause to avoid overloading
                self.make_a_pause(0.01, check_time_change=False)
            else:
                if getattr(brok, 'to_be_sent', False):
                    self.external_broks.append(brok)

        # Maybe our external modules raised 'objects', so get them
        if self.get_objects_from_from_queues():
            statsmgr.gauge('external-commands.got.count',
                           len(self.external_commands))
            statsmgr.gauge('broks.got.count', len(self.external_broks))