示例#1
0
 def __init__(self, process_name):
     super(Scheduler, self).__init__(process_name)
     self.logger.info('Starting %s' % self.process_name)
     self.publishers = PublishersPool(self.logger)
     self.thread_handlers = dict()
     self.lock = Lock()
     self.timetable = TimeTable(self.logger)
     self.regular_pipeline = RegularPipeline(self, self.timetable)
     self.hadoop_pipeline = HadoopPipeline(self, self.timetable)
     self.logger.info('Started %s' % self.process_name)
 def __init__(self, process_name):
     super(GarbageCollectorWorker, self).__init__(process_name)
     self.publishers = PublishersPool(self.logger)
     self.collection = CollectionContext.get_collection(self.logger, COLLECTION_UNITS_OF_WORK)
     self.lock = Lock()
示例#3
0
class Scheduler(SynergyProcess):
    """ Scheduler encapsulate logic for starting the aggregators/alerts/other readers """

    def __init__(self, process_name):
        super(Scheduler, self).__init__(process_name)
        self.logger.info('Starting %s' % self.process_name)
        self.publishers = PublishersPool(self.logger)
        self.thread_handlers = dict()
        self.lock = Lock()
        self.timetable = TimeTable(self.logger)
        self.regular_pipeline = RegularPipeline(self, self.timetable)
        self.hadoop_pipeline = HadoopPipeline(self, self.timetable)
        self.logger.info('Started %s' % self.process_name)


    def __del__(self):
        for handler in self.thread_handlers:
            handler.cancel()
        self.thread_handlers.clear()
        super(Scheduler, self).__del__()


    def _log_message(self, level, process_name, time_record, msg):
        """ method performs logging into log file and TimeTable node"""
        self.timetable.add_log_entry(process_name, time_record, datetime.utcnow(), msg)
        self.logger.log(level, msg)


    # **************** Scheduler Methods ************************
    @with_reconnect
    def start(self):
        """ reading scheduler configurations and starting timers to trigger events """
        collection = CollectionContext.get_collection(self.logger, COLLECTION_SCHEDULER_CONFIGURATION)
        cursor = collection.find({})
        if cursor.count() == 0:
            raise LookupError('MongoDB has no scheduler configuration entries')

        for entry in cursor:
            document = SchedulerConfigurationEntry(entry)
            interval = document.get_interval()
            is_active = document.get_process_state() == SchedulerConfigurationEntry.STATE_ON
            type = ProcessContext.get_type(document.get_process_name())
            parameters = [document.get_process_name(), document]

            if type == TYPE_ALERT:
                function = self.fire_alert
            elif type == TYPE_HORIZONTAL_AGGREGATOR:
                function = self.fire_worker
            elif type == TYPE_VERTICAL_AGGREGATOR:
                function = self.fire_worker
            elif type == TYPE_GARBAGE_COLLECTOR:
                function = self.fire_garbage_collector
            else:
                self.logger.error('Can not start scheduler for %s since it has no processing function' % type)
                continue

            handler = RepeatTimer(interval, function, args=parameters)
            self.thread_handlers[document.get_process_name()] = handler

            if is_active:
                handler.start()
                self.logger.info('Started scheduler for %s:%s, triggering every %d seconds'\
                % (type, document.get_process_name(), interval))
            else:
                self.logger.info('Handler for %s:%s registered in Scheduler. Idle until activated.'\
                % (type, document.get_process_name()))

        # as Scheduler is now initialized and running - we can safely start its MX
        self.start_mx()


    def start_mx(self):
        """ method's only purpose: import MX module (which has back-reference import to scheduler) and start it """
        from mx.mx import MX
        self.mx = MX(self)
        self.mx.start_mx_thread()


    def fire_worker(self, *args):
        """requests vertical aggregator (hourly site, daily variant, etc) to start up"""
        try:
            process_name = args[0]
            self.lock.acquire()
            self.logger.info('%s {' % process_name)
            time_record = self.timetable.get_next_timetable_record(process_name)
            time_qualifier = ProcessContext.get_time_qualifier(process_name)

            if time_qualifier == ProcessContext.QUALIFIER_HOURLY:
                self.regular_pipeline.manage_pipeline_for_process(process_name, time_record)
            else:
                self.hadoop_pipeline.manage_pipeline_for_process(process_name, time_record)

        except (AMQPException, IOError) as e:
            self.logger.error('AMQPException: %s' % str(e), exc_info=True)
            self.publishers.reset_all_publishers(suppress_logging=True)
        except Exception as e:
            self.logger.error('Exception: %s' % str(e), exc_info=True)
        finally:
            self.logger.info('}')
            self.lock.release()


    def fire_alert(self, *args):
        """ Triggers AlertWorker. Makes sure its <dependent on> trees have
            finalized corresponding timeperiods prior to that"""
        try:
            process_name = args[0]
            self.lock.acquire()
            self.logger.info('%s {' % process_name)

            time_record = self.timetable.get_next_timetable_record(process_name)
            self.hadoop_pipeline.manage_pipeline_with_blocking_dependencies(process_name, time_record)
        except (AMQPException, IOError) as e:
            self.logger.error('AMQPException: %s' % str(e), exc_info=True)
            self.publishers.reset_all_publishers(suppress_logging=True)
        except Exception as e:
            self.logger.error('Exception: %s' % str(e), exc_info=True)
        finally:
            self.logger.info('}')
            self.lock.release()


    def fire_garbage_collector(self, *args):
        """fires garbage collector to re-run all invalid records"""
        try:
            process_name = args[0]
            self.lock.acquire()
            self.logger.info('%s {' % process_name)

            self.publishers.get_publisher(process_name).publish({})
            self.logger.info('Publishing trigger for garbage_collector')
            self.timetable.build_tree()
            self.timetable.validate()
            self.logger.info('Validated Timetable for all trees')
        except (AMQPException, IOError) as e:
            self.logger.error('AMQPException: %s' % str(e), exc_info=True)
            self.publishers.reset_all_publishers(suppress_logging=True)
        except Exception as e:
            self.logger.error('fire_garbage_collector: %s' % str(e))
        finally:
            self.logger.info('}')
            self.lock.release()
class GarbageCollectorWorker(AbstractWorker):
    """
    this class reads stream of messages from RabbitMQ and dump them to the MongoDB
    """

    def __init__(self, process_name):
        super(GarbageCollectorWorker, self).__init__(process_name)
        self.publishers = PublishersPool(self.logger)
        self.collection = CollectionContext.get_collection(self.logger, COLLECTION_UNITS_OF_WORK)
        self.lock = Lock()

    def __del__(self):
        super(GarbageCollectorWorker, self).__del__()

    @thread_safe
    def _mq_callback(self, message):
        """ method looks for units of work in STATE_INVALID and re-runs them"""
        try:
            query = {
                UnitOfWorkEntry.STATE: {
                    "$in": [
                        UnitOfWorkEntry.STATE_IN_PROGRESS,
                        UnitOfWorkEntry.STATE_INVALID,
                        UnitOfWorkEntry.STATE_REQUESTED,
                    ]
                }
            }
            cursor = self.collection.find(query).sort("_id", ASCENDING)

            if cursor.count() != 0:
                for document in cursor:
                    self._process_single_document(document)
        except Exception as e:
            self.logger.error("_mq_callback: %s" % str(e), exc_info=True)
        finally:
            self.consumer.acknowledge(message.delivery_tag)

    def _process_single_document(self, document):
        """ actually inspects UOW retrieved from the database"""
        repost = False
        unit_of_work = UnitOfWorkEntry(document)
        process_name = unit_of_work.get_process_name()

        if unit_of_work.get_state() == UnitOfWorkEntry.STATE_INVALID:
            repost = True
        elif (
            unit_of_work.get_state() == UnitOfWorkEntry.STATE_IN_PROGRESS
            or unit_of_work.get_state() == UnitOfWorkEntry.STATE_REQUESTED
        ):

            last_activity = unit_of_work.get_started_at()
            if last_activity is None:
                last_activity = unit_of_work.get_created_at()

            if datetime.utcnow() - last_activity > timedelta(hours=REPOST_AFTER_HOURS):
                repost = True

        if repost:
            creation_time = unit_of_work.get_created_at()
            if datetime.utcnow() - creation_time < timedelta(hours=LIFE_SUPPORT_HOURS):
                unit_of_work.set_state(UnitOfWorkEntry.STATE_REQUESTED)
                unit_of_work.set_number_of_retries(unit_of_work.get_number_of_retries() + 1)
                unit_of_work_helper.update(self.logger, unit_of_work)
                self.publishers.get_publisher(process_name).publish(str(document["_id"]))

                self.logger.info(
                    "UOW marked for re-processing: process %s; id %s; attempt %d"
                    % (process_name, str(document["_id"]), unit_of_work.get_number_of_retries())
                )
                self.performance_ticker.increment()
            else:
                unit_of_work.set_state(UnitOfWorkEntry.STATE_CANCELED)
                unit_of_work_helper.update(self.logger, unit_of_work)
                self.logger.info(
                    "UOW transfered to STATE_CANCELED: process %s; id %s; attempt %d"
                    % (process_name, str(document["_id"]), unit_of_work.get_number_of_retries())
                )