def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [
                TriggerDefinition(conf, None) for conf in defs
            ]
        self.trigger_map = dict(
            (tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(
            self.config, db=self.db, trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()
예제 #2
0
    def __init__(self,
                 config,
                 db=None,
                 stackdistiller=None,
                 trigger_defs=None,
                 time_sync=None):
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.debug_manager = debugging.DebugManager()
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])
        if stackdistiller is not None:
            self.distiller = stackdistiller
        else:
            # distiller_config is optional
            if config.contains('distiller_config'):
                dist_config = config.load_file(config['distiller_config'])
                plugmap = self._load_plugins(config['distiller_trait_plugins'],
                                             distiller.DEFAULT_PLUGINMAP)
                self.distiller = distiller.Distiller(
                    dist_config,
                    trait_plugin_map=plugmap,
                    catchall=config['catch_all_notifications'])
        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
            for t in self.trigger_definitions:
                t.set_debugger(self.debug_manager)
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                self.trigger_definitions = [
                    TriggerDefinition(conf, self.debug_manager)
                    for conf in defs
                ]
        # trigger_map is used to quickly access existing trigger_defs
        self.trigger_map = dict(
            (tdef.name, tdef) for tdef in self.trigger_definitions)
        self.saved_events = 0
        self.received = 0
        self.last_status = self.current_time()
 def config_description(cls):
     return dict(
         config_path=ConfigItem(
             help="Path(s) to find additional config files",
             multiple=True, default='.'),
         distiller_config=ConfigItem(
             required=False,
             help="Name of distiller config file "
                  "describing what to extract from the "
                  "notifications"),
         distiller_trait_plugins=ConfigItem(
             help="dictionary of trait plugins to load "
                  "for stackdistiller. Classes specified with "
                  "simport syntax. See stackdistiller and "
                  "simport docs for more info", default=dict()),
         time_sync_endpoint=ConfigItem(
             help="URL of time sync service for use with"
                  " replying old events.",
             default=None),
         catch_all_notifications=ConfigItem(
             help="Store basic info for all notifications,"
                  " even if not listed in distiller config",
             default=False),
         statistics_period=ConfigItem(
             help="Emit stats on event counts, etc every "
                  "this many seconds", default=10),
         database=ConfigSection(
             help="Database connection info.",
             config_description=DBInterface.config_description()),
         trigger_definitions=ConfigItem(
             required=False,
             help="Name of trigger definitions file "
                  "defining trigger conditions and what events to "
                  "process for each stream"),
     )
    def __init__(self, config, db=None, stackdistiller=None, trigger_defs=None):
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.debug_manager = debugging.DebugManager()
        config.check_config()
        config.add_config_path(*config['config_path'])

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])
        if stackdistiller is not None:
            self.distiller = stackdistiller
        else:
            dist_config = config.load_file(config['distiller_config'])
            plugmap = self._load_plugins(config['distiller_trait_plugins'],
                                         distiller.DEFAULT_PLUGINMAP)
            self.distiller = distiller.Distiller(dist_config,
                                                 trait_plugin_map=plugmap,
                                                 catchall=config['catch_all_notifications'])
        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
            for t in self.trigger_definitions:
                t.set_debugger(self.debug_manager)
        else:
            defs = config.load_file(config['trigger_definitions'])
            self.trigger_definitions = [TriggerDefinition(conf,
                                        self.debug_manager)
                                            for conf in defs]
        self.saved_events = 0
        self.received = 0
        self.last_status = self.current_time()
 def config_description(cls):
     return dict(
         config_path=ConfigItem(
             help="Path(s) to find additional config files",
             multiple=True,
             default='.'),
         distiller_config=ConfigItem(required=True,
                                     help="Name of distiller config file "
                                     "describing what to extract from the "
                                     "notifications"),
         distiller_trait_plugins=ConfigItem(
             help="dictionary of trait plugins to load "
             "for stackdistiller. Classes specified with "
             "simport syntax. See stackdistiller and "
             "simport docs for more info",
             default=dict()),
         catch_all_notifications=ConfigItem(
             help="Store basic info for all notifications,"
             " even if not listed in distiller config",
             default=False),
         statistics_period=ConfigItem(
             help="Emit stats on event counts, etc every "
             "this many seconds",
             default=10),
         database=ConfigSection(
             help="Database connection info.",
             config_description=DBInterface.config_description()),
         trigger_definitions=ConfigItem(
             required=True,
             help="Name of trigger definitions file "
             "defining trigger conditions and what events to "
             "process for each stream"),
     )
예제 #6
0
 def config_description(cls):
     return dict(
         config_path=ConfigItem(
             help="Path(s) to find additional config files",
             multiple=True, default='.'),
         database=ConfigSection(
             help="Database connection info.",
             config_description=DBInterface.config_description()),
     )
    def __init__(self,
                 config,
                 db=None,
                 stackdistiller=None,
                 trigger_defs=None):
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.debug_manager = debugging.DebugManager()
        config.check_config()
        config.add_config_path(*config['config_path'])

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])
        if stackdistiller is not None:
            self.distiller = stackdistiller
        else:
            dist_config = config.load_file(config['distiller_config'])
            plugmap = self._load_plugins(config['distiller_trait_plugins'],
                                         distiller.DEFAULT_PLUGINMAP)
            self.distiller = distiller.Distiller(
                dist_config,
                trait_plugin_map=plugmap,
                catchall=config['catch_all_notifications'])
        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
            for t in self.trigger_definitions:
                t.set_debugger(self.debug_manager)
        else:
            defs = config.load_file(config['trigger_definitions'])
            self.trigger_definitions = [
                TriggerDefinition(conf, self.debug_manager) for conf in defs
            ]
        self.saved_events = 0
        self.received = 0
        self.last_status = self.current_time()
    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [Pipeline.check_handler_config(conf,
                                                self.pipeline_handlers)
                                              for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [TriggerDefinition(conf, None) for conf in defs]
        self.trigger_map = dict((tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(self.config, db=self.db,
                                              trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()
    def __init__(self, config, db=None, stackdistiller=None, trigger_defs=None,
                 time_sync=None):
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.debug_manager = debugging.DebugManager()
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])
        if stackdistiller is not None:
            self.distiller = stackdistiller
        else:
            # distiller_config is optional
            if config.contains('distiller_config'):
                dist_config = config.load_file(config['distiller_config'])
                plugmap = self._load_plugins(config['distiller_trait_plugins'],
                                             distiller.DEFAULT_PLUGINMAP)
                self.distiller = distiller.Distiller(
                    dist_config,
                    trait_plugin_map=plugmap,
                    catchall=config['catch_all_notifications'])
        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
            for t in self.trigger_definitions:
                t.set_debugger(self.debug_manager)
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                self.trigger_definitions = [
                    TriggerDefinition(conf, self.debug_manager)
                    for conf in defs]
        # trigger_map is used to quickly access existing trigger_defs
        self.trigger_map = dict(
            (tdef.name, tdef) for tdef in self.trigger_definitions)
        self.saved_events = 0
        self.received = 0
        self.last_status = self.current_time()
class PipelineManager(object):
    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(
            dict(
                pipeline_handlers=ConfigItem(
                    required=True,
                    help="dictionary of pipeline handlers to load "
                    "Classes specified with simport syntax. "
                    "simport docs for more info"),
                pipeline_worker_batch_size=ConfigItem(
                    help="Number of streams for pipeline "
                    "worker(s) to load at a time",
                    default=1000),
                pipeline_worker_delay=ConfigItem(
                    help="Number of seconds for pipeline worker "
                    "to sleep when it finds no streams to "
                    "process",
                    default=10),
                pipeline_config=ConfigItem(required=True,
                                           help="Name of pipeline config file "
                                           "defining the handlers for each "
                                           "pipeline."),
                purge_completed_streams=ConfigItem(
                    help="Delete successfully proccessed "
                    "streams when finished?",
                    default=True),
            ))
        return configs

    def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [
                TriggerDefinition(conf, None) for conf in defs
            ]
        self.trigger_map = dict(
            (tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(
            self.config, db=self.db, trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                log.error("Could not load plugin %s: Import failed. %s" %
                          (name, e))
            except (simport.MissingMethodOrFunction, simport.MissingModule,
                    simport.BadDirectory) as e:
                log.error("Could not load plugin %s: Not found. %s" %
                          (name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return datetime.datetime.utcnow()

    def _log_statistics(self):
        logger.info(
            "Loaded %s streams. Fired %s, Expired %s." %
            (self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config,
                                self.pipeline_handlers)
            new_events = pipeline.handle_events(events, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" %
                         (pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error(
                    "Stream %s locked while trying to set 'complete' state! "
                    "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error(
                "Stream %s locked while trying to set 'expire_error' state! "
                "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def fire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" %
                             (stream.name, stream.id, pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired += 1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error(
                    "Trigger %s for stream %s has unknown pipeline %s" %
                    (stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired += 1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size,
                                            self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def run(self):
        while True:
            fire_ct = self.process_ready_streams(
                self.pipeline_worker_batch_size)
            expire_ct = self.process_ready_streams(
                self.pipeline_worker_batch_size, expire=True)

            if (self.current_time() -
                    self.last_status).seconds > self.statistics_period:
                self._log_statistics()

            if not fire_ct and not expire_ct:
                logger.debug("No streams to fire or expire. Sleeping...")
                time.sleep(self.pipeline_worker_delay)
    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None, time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s"
                     % (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf,
                                              self.pipeline_handlers)
                for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs]

        self.trigger_manager = TriggerManager(
            self.config, db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()
class PipelineManager(object):
    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(dict(
            pipeline_handlers=ConfigItem(
                required=True,
                help="dictionary of pipeline handlers to load "
                     "Classes specified with simport syntax. "
                     "simport docs for more info"),
            pipeline_worker_batch_size=ConfigItem(
                help="Number of streams for pipeline "
                     "worker(s) to load at a time",
                default=1000),
            pipeline_worker_delay=ConfigItem(
                help="Number of seconds for pipeline worker "
                     "to sleep when it finds no streams to "
                     "process", default=10),
            pipeline_config=ConfigItem(required=True,
                                       help="Name of pipeline config file "
                                            "defining the handlers for each "
                                            "pipeline."),
            purge_completed_streams=ConfigItem(
                help="Delete successfully proccessed "
                     "streams when finished?",
                default=True),
            trim_events=ConfigItem(
                help="Delete events older than a configurable time.",
                default=False),
            trim_events_age=ConfigItem(
                help="Delete events older than this (timex expr).",
                default="$timestamp - 14d"),
            trim_events_batch_size=ConfigItem(
                help="Maximum number of events for pipeline "
                     "worker(s) to trim at a time",
                default=100),
        ))
        return configs

    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None, time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s"
                     % (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf,
                                              self.pipeline_handlers)
                for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs]

        self.trigger_manager = TriggerManager(
            self.config, db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                logger.error("Could not load plugin %s: Import failed. %s" % (
                             name, e))
            except (simport.MissingMethodOrFunction,
                    simport.MissingModule,
                    simport.BadDirectory) as e:
                logger.error("Could not load plugin %s: Not found. %s" % (
                    name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return self.time_sync.current_time()

    def _log_statistics(self):
        logger.info("Loaded %s streams. Fired %s, Expired %s." % (
            self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config,
                                self.pipeline_handlers)
            new_events = pipeline.handle_events(events, stream, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" % (
                pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error(
                    "Stream %s locked while trying to set 'complete' state! "
                    "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error(
                "Stream %s locked while trying to set 'expire_error' state! "
                "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def add_trigger_definition(self, list_of_triggerdefs):
        self.trigger_manager.add_trigger_definition(list_of_triggerdefs)

    def delete_trigger_definition(self, trigger_def_name):
        self.trigger_manager.delete_trigger_definition(trigger_def_name)

    def fire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                         stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" % (stream.name, stream.id,
                                              pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." % (
                         stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired += 1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error(
                    "Trigger %s for stream %s has unknown pipeline %s" % (
                        stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." % (
                stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired += 1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size, self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def process_trim_events(self):
        trim_date = self.trim_events_age().timestamp
        event_ids = self.db.find_older_events(trim_date,
                                              self.trim_events_batch_size)
        logger.debug("Trimming %s old events" % len(event_ids))
        self.db.purge_events(event_ids)
        return len(event_ids)

    def run(self):
        while True:
            try:
                fire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size)
                expire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size,
                    expire=True)

                trim_ct = 0
                if self.trim_events:
                    trim_ct = self.process_trim_events()

                if ((self.current_time() - self.last_status).seconds
                        > self.statistics_period):
                    self._log_statistics()

                if not fire_ct and not expire_ct and not trim_ct:
                    logger.debug("No streams to fire or expire. Sleeping...")
                    time.sleep(self.pipeline_worker_delay)
            except DatabaseConnectionError:
                logger.warn("Database Connection went away. Reconnecting...")
                time.sleep(5)
                # DB layer will reconnect automatically. We just need to
                # retry the operation. (mdragon)
            except Exception:
                logger.exception("Unknown Error in pipeline worker!")
                raise
예제 #13
0
    def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None,
                 time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s" %
                     (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs
                ]

        self.trigger_manager = TriggerManager(
            self.config,
            db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()
class TriggerManager(object):

    @classmethod
    def config_description(cls):
        return dict(
            config_path=ConfigItem(
                help="Path(s) to find additional config files",
                multiple=True, default='.'),
            distiller_config=ConfigItem(
                required=False,
                help="Name of distiller config file "
                     "describing what to extract from the "
                     "notifications"),
            distiller_trait_plugins=ConfigItem(
                help="dictionary of trait plugins to load "
                     "for stackdistiller. Classes specified with "
                     "simport syntax. See stackdistiller and "
                     "simport docs for more info", default=dict()),
            time_sync_endpoint=ConfigItem(
                help="URL of time sync service for use with"
                     " replying old events.",
                default=None),
            catch_all_notifications=ConfigItem(
                help="Store basic info for all notifications,"
                     " even if not listed in distiller config",
                default=False),
            statistics_period=ConfigItem(
                help="Emit stats on event counts, etc every "
                     "this many seconds", default=10),
            database=ConfigSection(
                help="Database connection info.",
                config_description=DBInterface.config_description()),
            trigger_definitions=ConfigItem(
                required=False,
                help="Name of trigger definitions file "
                     "defining trigger conditions and what events to "
                     "process for each stream"),
        )

    def __init__(self, config, db=None, stackdistiller=None, trigger_defs=None,
                 time_sync=None):
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.debug_manager = debugging.DebugManager()
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])
        if stackdistiller is not None:
            self.distiller = stackdistiller
        else:
            # distiller_config is optional
            if config.contains('distiller_config'):
                dist_config = config.load_file(config['distiller_config'])
                plugmap = self._load_plugins(config['distiller_trait_plugins'],
                                             distiller.DEFAULT_PLUGINMAP)
                self.distiller = distiller.Distiller(
                    dist_config,
                    trait_plugin_map=plugmap,
                    catchall=config['catch_all_notifications'])
        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
            for t in self.trigger_definitions:
                t.set_debugger(self.debug_manager)
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                self.trigger_definitions = [
                    TriggerDefinition(conf, self.debug_manager)
                    for conf in defs]
        # trigger_map is used to quickly access existing trigger_defs
        self.trigger_map = dict(
            (tdef.name, tdef) for tdef in self.trigger_definitions)
        self.saved_events = 0
        self.received = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                logger.error("Could not load plugin %s: Import failed. %s" % (
                    name, e))
            except (simport.MissingMethodOrFunction,
                    simport.MissingModule,
                    simport.BadDirectory) as e:
                logger.error("Could not load plugin %s: Not found. %s" % (
                    name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return self.time_sync.current_time()

    def save_event(self, event):
        traits = {}
        try:
            message_id = event['message_id']
            timestamp = event['timestamp']
            event_type = event['event_type']
        except KeyError as e:
            logger.warning("Received invalid event: %s" % e)
            return False
        for key, val in event.items():
            if key not in ('message_id', 'timestamp', 'event_type'):
                if val is not None:
                    traits[key] = val
        try:
            self.db.create_event(message_id, event_type,
                                 timestamp, traits)
            self.saved_events += 1
            return True
        except DuplicateError:
            logger.info("Received duplicate event %s, Ignoring." % message_id)
        return False

    def convert_notification(self, notification_body):
        cond = EventCondenser(self.db)
        cond.clear()
        self.received += 1
        if self.distiller.to_event(notification_body, cond):
            if cond.validate():
                return cond.get_event()
            else:
                logger.warning("Received invalid event")
        else:
            event_type = notification_body.get('event_type',
                                               '**no event_type**')
            message_id = notification_body.get('message_id', '**no id**')
            logger.info("Dropping unconverted %s notification %s"
                        % (event_type, message_id))
        return None

    def _log_statistics(self):
        logger.info("Received %s notifications. Saved %s events." % (
                    self.received, self.saved_events))
        self.received = 0
        self.saved_events = 0
        self.last_status = self.current_time()

        self.debug_manager.dump_debuggers()

    def _add_or_create_stream(self, trigger_def, event, dist_traits):
        stream = self.db.get_active_stream(trigger_def.name, dist_traits,
                                           self.current_time())
        if stream is None:
            trigger_def.debugger.bump_counter("New stream")
            stream = self.db.create_stream(trigger_def.name, event,
                                           dist_traits,
                                           trigger_def.expiration)
            logger.debug("Created New stream %s for %s: distinguished by %s"
                         % (stream.id, trigger_def.name, str(dist_traits)))
        else:
            self.db.add_event_stream(stream, event, trigger_def.expiration)
        return stream

    def _ready_to_fire(self, stream, trigger_def):
        timestamp = trigger_def.get_fire_timestamp(self.current_time())
        self.db.stream_ready_to_fire(stream, timestamp)
        trigger_def.debugger.bump_counter("Ready to fire")
        logger.debug("Stream %s ready to fire at %s" % (stream.id, timestamp))

    def add_trigger_definition(self, list_of_triggerdefs, debugger=None):
        if debugger is None:
            debugger = self.debug_manager
        for td in list_of_triggerdefs:
            if (td['name'] in self.trigger_map) is False:
                # Only add if name is unique
                tdef = TriggerDefinition(td, debugger)
                self.trigger_definitions.append(tdef)
                self.trigger_map[td['name']] = tdef

    def delete_trigger_definition(self, trigger_def_name):
        if trigger_def_name in self.trigger_map:
            self.trigger_definitions.remove(
                self.trigger_map.get(trigger_def_name))
            del self.trigger_map[trigger_def_name]

    def add_event(self, event):
        if self.save_event(event):
            for trigger_def in self.trigger_definitions:
                matched_criteria = trigger_def.match(event)
                if matched_criteria:
                    dist_traits = trigger_def.get_distinguishing_traits(
                        event, matched_criteria)
                    stream = self._add_or_create_stream(trigger_def, event,
                                                        dist_traits)
                    trigger_def.debugger.bump_counter("Added events")
                    if stream.fire_timestamp is None:
                        if trigger_def.should_fire(self.db.get_stream_events(
                                stream)):
                            self._ready_to_fire(stream, trigger_def)

    def add_notification(self, notification_body):
        event = self.convert_notification(notification_body)
        if event:
            self.add_event(event)
예제 #15
0
class TriggerManager(object):
    @classmethod
    def config_description(cls):
        return dict(
            config_path=ConfigItem(
                help="Path(s) to find additional config files",
                multiple=True,
                default='.'),
            distiller_config=ConfigItem(required=False,
                                        help="Name of distiller config file "
                                        "describing what to extract from the "
                                        "notifications"),
            distiller_trait_plugins=ConfigItem(
                help="dictionary of trait plugins to load "
                "for stackdistiller. Classes specified with "
                "simport syntax. See stackdistiller and "
                "simport docs for more info",
                default=dict()),
            time_sync_endpoint=ConfigItem(
                help="URL of time sync service for use with"
                " replying old events.",
                default=None),
            catch_all_notifications=ConfigItem(
                help="Store basic info for all notifications,"
                " even if not listed in distiller config",
                default=False),
            statistics_period=ConfigItem(
                help="Emit stats on event counts, etc every "
                "this many seconds",
                default=10),
            database=ConfigSection(
                help="Database connection info.",
                config_description=DBInterface.config_description()),
            trigger_definitions=ConfigItem(
                required=False,
                help="Name of trigger definitions file "
                "defining trigger conditions and what events to "
                "process for each stream"),
        )

    def __init__(self,
                 config,
                 db=None,
                 stackdistiller=None,
                 trigger_defs=None,
                 time_sync=None):
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.debug_manager = debugging.DebugManager()
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])
        if stackdistiller is not None:
            self.distiller = stackdistiller
        else:
            # distiller_config is optional
            if config.contains('distiller_config'):
                dist_config = config.load_file(config['distiller_config'])
                plugmap = self._load_plugins(config['distiller_trait_plugins'],
                                             distiller.DEFAULT_PLUGINMAP)
                self.distiller = distiller.Distiller(
                    dist_config,
                    trait_plugin_map=plugmap,
                    catchall=config['catch_all_notifications'])
        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
            for t in self.trigger_definitions:
                t.set_debugger(self.debug_manager)
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                self.trigger_definitions = [
                    TriggerDefinition(conf, self.debug_manager)
                    for conf in defs
                ]
        # trigger_map is used to quickly access existing trigger_defs
        self.trigger_map = dict(
            (tdef.name, tdef) for tdef in self.trigger_definitions)
        self.saved_events = 0
        self.received = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                logger.error("Could not load plugin %s: Import failed. %s" %
                             (name, e))
            except (simport.MissingMethodOrFunction, simport.MissingModule,
                    simport.BadDirectory) as e:
                logger.error("Could not load plugin %s: Not found. %s" %
                             (name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return self.time_sync.current_time()

    def save_event(self, event):
        traits = {}
        try:
            message_id = event['message_id']
            timestamp = event['timestamp']
            event_type = event['event_type']
        except KeyError as e:
            logger.warning("Received invalid event: %s" % e)
            return False
        for key, val in event.items():
            if key not in ('message_id', 'timestamp', 'event_type'):
                if val is not None:
                    traits[key] = val
        try:
            self.db.create_event(message_id, event_type, timestamp, traits)
            self.saved_events += 1
            return True
        except DuplicateError:
            logger.info("Received duplicate event %s, Ignoring." % message_id)
        return False

    def convert_notification(self, notification_body):
        cond = EventCondenser(self.db)
        cond.clear()
        self.received += 1
        if self.distiller.to_event(notification_body, cond):
            if cond.validate():
                return cond.get_event()
            else:
                logger.warning("Received invalid event")
        else:
            event_type = notification_body.get('event_type',
                                               '**no event_type**')
            message_id = notification_body.get('message_id', '**no id**')
            logger.info("Dropping unconverted %s notification %s" %
                        (event_type, message_id))
        return None

    def _log_statistics(self):
        logger.info("Received %s notifications. Saved %s events." %
                    (self.received, self.saved_events))
        self.received = 0
        self.saved_events = 0
        self.last_status = self.current_time()

        self.debug_manager.dump_debuggers()

    def _add_or_create_stream(self, trigger_def, event, dist_traits):
        stream = self.db.get_active_stream(trigger_def.name, dist_traits,
                                           self.current_time())
        if stream is None:
            trigger_def.debugger.bump_counter("New stream")
            stream = self.db.create_stream(trigger_def.name, event,
                                           dist_traits, trigger_def.expiration)
            logger.debug("Created New stream %s for %s: distinguished by %s" %
                         (stream.id, trigger_def.name, str(dist_traits)))
        else:
            self.db.add_event_stream(stream, event, trigger_def.expiration)
        return stream

    def _ready_to_fire(self, stream, trigger_def):
        timestamp = trigger_def.get_fire_timestamp(self.current_time())
        self.db.stream_ready_to_fire(stream, timestamp)
        trigger_def.debugger.bump_counter("Ready to fire")
        logger.debug("Stream %s ready to fire at %s" % (stream.id, timestamp))

    def add_trigger_definition(self, list_of_triggerdefs, debugger=None):
        if debugger is None:
            debugger = self.debug_manager
        for td in list_of_triggerdefs:
            if (td['name'] in self.trigger_map) is False:
                # Only add if name is unique
                tdef = TriggerDefinition(td, debugger)
                self.trigger_definitions.append(tdef)
                self.trigger_map[td['name']] = tdef

    def delete_trigger_definition(self, trigger_def_name):
        if trigger_def_name in self.trigger_map:
            self.trigger_definitions.remove(
                self.trigger_map.get(trigger_def_name))
            del self.trigger_map[trigger_def_name]

    def add_event(self, event):
        if self.save_event(event):
            for trigger_def in self.trigger_definitions:
                matched_criteria = trigger_def.match(event)
                if matched_criteria:
                    dist_traits = trigger_def.get_distinguishing_traits(
                        event, matched_criteria)
                    stream = self._add_or_create_stream(
                        trigger_def, event, dist_traits)
                    trigger_def.debugger.bump_counter("Added events")
                    if stream.fire_timestamp is None:
                        if trigger_def.should_fire(
                                self.db.get_stream_events(stream)):
                            self._ready_to_fire(stream, trigger_def)

    def add_notification(self, notification_body):
        event = self.convert_notification(notification_body)
        if event:
            self.add_event(event)
class PipelineManager(object):

    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(dict(
                    pipeline_handlers=ConfigItem(required=True,
                                 help="dictionary of pipeline handlers to load "
                                       "Classes specified with simport syntax. "
                                       "simport docs for more info"),
                    pipeline_worker_batch_size=ConfigItem(
                                 help="Number of streams for pipeline "
                                      "worker(s) to load at a time",
                                      default=1000),
                    pipeline_worker_delay=ConfigItem(
                                 help="Number of seconds for pipeline worker "
                                      "to sleep when it finds no streams to "
                                      "process", default=10),
                    pipeline_config=ConfigItem(required=True,
                                       help="Name of pipeline config file "
                                            "defining the handlers for each "
                                            "pipeline."),
                    purge_completed_streams=ConfigItem(
                                       help="Delete successfully proccessed "
                                            "streams when finished?",
                                            default=True),
                   ))
        return configs

    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [Pipeline.check_handler_config(conf,
                                                self.pipeline_handlers)
                                              for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [TriggerDefinition(conf, None) for conf in defs]
        self.trigger_map = dict((tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(self.config, db=self.db,
                                              trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                log.error("Could not load plugin %s: Import failed. %s" % (
                          name, e))
            except (simport.MissingMethodOrFunction,
                    simport.MissingModule,
                    simport.BadDirectory) as e:
                log.error("Could not load plugin %s: Not found. %s" % (
                          name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return datetime.datetime.utcnow()

    def _log_statistics(self):
        logger.info("Loaded %s streams. Fired %s, Expired %s." % (
                    self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config, self.pipeline_handlers)
            new_events = pipeline.handle_events(events, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" % (
                          pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error("Stream %s locked while trying to set 'complete' state! "
                             "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'expire_error' state! "
                         "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def fire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                         stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" % (stream.name, stream.id,
                                              pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." % (
                         stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired +=1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                         stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown pipeline %s" % (
                            stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." % (
                         stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired +=1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size, self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def run(self):
        while True:
            fire_ct = self.process_ready_streams(self.pipeline_worker_batch_size)
            expire_ct = self.process_ready_streams(self.pipeline_worker_batch_size,
                                                   expire=True)

            if (self.current_time() - self.last_status).seconds > self.statistics_period:
                self._log_statistics()

            if not fire_ct and not expire_ct:
                logger.debug("No streams to fire or expire. Sleeping...")
                time.sleep(self.pipeline_worker_delay)
예제 #17
0
class PipelineManager(object):
    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(
            dict(
                pipeline_handlers=ConfigItem(
                    required=True,
                    help="dictionary of pipeline handlers to load "
                    "Classes specified with simport syntax. "
                    "simport docs for more info"),
                pipeline_worker_batch_size=ConfigItem(
                    help="Number of streams for pipeline "
                    "worker(s) to load at a time",
                    default=1000),
                pipeline_worker_delay=ConfigItem(
                    help="Number of seconds for pipeline worker "
                    "to sleep when it finds no streams to "
                    "process",
                    default=10),
                pipeline_config=ConfigItem(required=True,
                                           help="Name of pipeline config file "
                                           "defining the handlers for each "
                                           "pipeline."),
                purge_completed_streams=ConfigItem(
                    help="Delete successfully proccessed "
                    "streams when finished?",
                    default=True),
                trim_events=ConfigItem(
                    help="Delete events older than a configurable time.",
                    default=False),
                trim_events_age=ConfigItem(
                    help="Delete events older than this (timex expr).",
                    default="$timestamp - 14d"),
                trim_events_batch_size=ConfigItem(
                    help="Maximum number of events for pipeline "
                    "worker(s) to trim at a time",
                    default=100),
            ))
        return configs

    def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None,
                 time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s" %
                     (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs
                ]

        self.trigger_manager = TriggerManager(
            self.config,
            db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                logger.error("Could not load plugin %s: Import failed. %s" %
                             (name, e))
            except (simport.MissingMethodOrFunction, simport.MissingModule,
                    simport.BadDirectory) as e:
                logger.error("Could not load plugin %s: Not found. %s" %
                             (name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return self.time_sync.current_time()

    def _log_statistics(self):
        logger.info(
            "Loaded %s streams. Fired %s, Expired %s." %
            (self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config,
                                self.pipeline_handlers)
            new_events = pipeline.handle_events(events, stream, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" %
                         (pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error(
                    "Stream %s locked while trying to set 'complete' state! "
                    "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error(
                "Stream %s locked while trying to set 'expire_error' state! "
                "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def add_trigger_definition(self, list_of_triggerdefs):
        self.trigger_manager.add_trigger_definition(list_of_triggerdefs)

    def delete_trigger_definition(self, trigger_def_name):
        self.trigger_manager.delete_trigger_definition(trigger_def_name)

    def fire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" %
                             (stream.name, stream.id, pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired += 1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error(
                    "Trigger %s for stream %s has unknown pipeline %s" %
                    (stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired += 1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size,
                                            self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def process_trim_events(self):
        trim_date = self.trim_events_age().timestamp
        event_ids = self.db.find_older_events(trim_date,
                                              self.trim_events_batch_size)
        logger.debug("Trimming %s old events" % len(event_ids))
        self.db.purge_events(event_ids)
        return len(event_ids)

    def run(self):
        while True:
            try:
                fire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size)
                expire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size, expire=True)

                trim_ct = 0
                if self.trim_events:
                    trim_ct = self.process_trim_events()

                if ((self.current_time() - self.last_status).seconds >
                        self.statistics_period):
                    self._log_statistics()

                if not fire_ct and not expire_ct and not trim_ct:
                    logger.debug("No streams to fire or expire. Sleeping...")
                    time.sleep(self.pipeline_worker_delay)
            except DatabaseConnectionError:
                logger.warn("Database Connection went away. Reconnecting...")
                time.sleep(5)
                # DB layer will reconnect automatically. We just need to
                # retry the operation. (mdragon)
            except Exception:
                logger.exception("Unknown Error in pipeline worker!")
                raise