class PipelineManager(object):

    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(dict(
                    pipeline_handlers=ConfigItem(required=True,
                                 help="dictionary of pipeline handlers to load "
                                       "Classes specified with simport syntax. "
                                       "simport docs for more info"),
                    pipeline_worker_batch_size=ConfigItem(
                                 help="Number of streams for pipeline "
                                      "worker(s) to load at a time",
                                      default=1000),
                    pipeline_worker_delay=ConfigItem(
                                 help="Number of seconds for pipeline worker "
                                      "to sleep when it finds no streams to "
                                      "process", default=10),
                    pipeline_config=ConfigItem(required=True,
                                       help="Name of pipeline config file "
                                            "defining the handlers for each "
                                            "pipeline."),
                    purge_completed_streams=ConfigItem(
                                       help="Delete successfully proccessed "
                                            "streams when finished?",
                                            default=True),
                   ))
        return configs

    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [Pipeline.check_handler_config(conf,
                                                self.pipeline_handlers)
                                              for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [TriggerDefinition(conf, None) for conf in defs]
        self.trigger_map = dict((tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(self.config, db=self.db,
                                              trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                log.error("Could not load plugin %s: Import failed. %s" % (
                          name, e))
            except (simport.MissingMethodOrFunction,
                    simport.MissingModule,
                    simport.BadDirectory) as e:
                log.error("Could not load plugin %s: Not found. %s" % (
                          name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return datetime.datetime.utcnow()

    def _log_statistics(self):
        logger.info("Loaded %s streams. Fired %s, Expired %s." % (
                    self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config, self.pipeline_handlers)
            new_events = pipeline.handle_events(events, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" % (
                          pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error("Stream %s locked while trying to set 'complete' state! "
                             "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'expire_error' state! "
                         "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def fire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                         stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" % (stream.name, stream.id,
                                              pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." % (
                         stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired +=1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                         stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown pipeline %s" % (
                            stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." % (
                         stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired +=1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size, self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def run(self):
        while True:
            fire_ct = self.process_ready_streams(self.pipeline_worker_batch_size)
            expire_ct = self.process_ready_streams(self.pipeline_worker_batch_size,
                                                   expire=True)

            if (self.current_time() - self.last_status).seconds > self.statistics_period:
                self._log_statistics()

            if not fire_ct and not expire_ct:
                logger.debug("No streams to fire or expire. Sleeping...")
                time.sleep(self.pipeline_worker_delay)
class PipelineManager(object):
    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(dict(
            pipeline_handlers=ConfigItem(
                required=True,
                help="dictionary of pipeline handlers to load "
                     "Classes specified with simport syntax. "
                     "simport docs for more info"),
            pipeline_worker_batch_size=ConfigItem(
                help="Number of streams for pipeline "
                     "worker(s) to load at a time",
                default=1000),
            pipeline_worker_delay=ConfigItem(
                help="Number of seconds for pipeline worker "
                     "to sleep when it finds no streams to "
                     "process", default=10),
            pipeline_config=ConfigItem(required=True,
                                       help="Name of pipeline config file "
                                            "defining the handlers for each "
                                            "pipeline."),
            purge_completed_streams=ConfigItem(
                help="Delete successfully proccessed "
                     "streams when finished?",
                default=True),
            trim_events=ConfigItem(
                help="Delete events older than a configurable time.",
                default=False),
            trim_events_age=ConfigItem(
                help="Delete events older than this (timex expr).",
                default="$timestamp - 14d"),
            trim_events_batch_size=ConfigItem(
                help="Maximum number of events for pipeline "
                     "worker(s) to trim at a time",
                default=100),
        ))
        return configs

    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None, time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s"
                     % (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf,
                                              self.pipeline_handlers)
                for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs]

        self.trigger_manager = TriggerManager(
            self.config, db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                logger.error("Could not load plugin %s: Import failed. %s" % (
                             name, e))
            except (simport.MissingMethodOrFunction,
                    simport.MissingModule,
                    simport.BadDirectory) as e:
                logger.error("Could not load plugin %s: Not found. %s" % (
                    name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return self.time_sync.current_time()

    def _log_statistics(self):
        logger.info("Loaded %s streams. Fired %s, Expired %s." % (
            self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config,
                                self.pipeline_handlers)
            new_events = pipeline.handle_events(events, stream, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" % (
                pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error(
                    "Stream %s locked while trying to set 'complete' state! "
                    "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error(
                "Stream %s locked while trying to set 'expire_error' state! "
                "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def add_trigger_definition(self, list_of_triggerdefs):
        self.trigger_manager.add_trigger_definition(list_of_triggerdefs)

    def delete_trigger_definition(self, trigger_def_name):
        self.trigger_manager.delete_trigger_definition(trigger_def_name)

    def fire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                         stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" % (stream.name, stream.id,
                                              pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." % (
                         stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired += 1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error(
                    "Trigger %s for stream %s has unknown pipeline %s" % (
                        stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." % (
                stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired += 1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size, self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def process_trim_events(self):
        trim_date = self.trim_events_age().timestamp
        event_ids = self.db.find_older_events(trim_date,
                                              self.trim_events_batch_size)
        logger.debug("Trimming %s old events" % len(event_ids))
        self.db.purge_events(event_ids)
        return len(event_ids)

    def run(self):
        while True:
            try:
                fire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size)
                expire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size,
                    expire=True)

                trim_ct = 0
                if self.trim_events:
                    trim_ct = self.process_trim_events()

                if ((self.current_time() - self.last_status).seconds
                        > self.statistics_period):
                    self._log_statistics()

                if not fire_ct and not expire_ct and not trim_ct:
                    logger.debug("No streams to fire or expire. Sleeping...")
                    time.sleep(self.pipeline_worker_delay)
            except DatabaseConnectionError:
                logger.warn("Database Connection went away. Reconnecting...")
                time.sleep(5)
                # DB layer will reconnect automatically. We just need to
                # retry the operation. (mdragon)
            except Exception:
                logger.exception("Unknown Error in pipeline worker!")
                raise
class PipelineManager(object):
    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(
            dict(
                pipeline_handlers=ConfigItem(
                    required=True,
                    help="dictionary of pipeline handlers to load "
                    "Classes specified with simport syntax. "
                    "simport docs for more info"),
                pipeline_worker_batch_size=ConfigItem(
                    help="Number of streams for pipeline "
                    "worker(s) to load at a time",
                    default=1000),
                pipeline_worker_delay=ConfigItem(
                    help="Number of seconds for pipeline worker "
                    "to sleep when it finds no streams to "
                    "process",
                    default=10),
                pipeline_config=ConfigItem(required=True,
                                           help="Name of pipeline config file "
                                           "defining the handlers for each "
                                           "pipeline."),
                purge_completed_streams=ConfigItem(
                    help="Delete successfully proccessed "
                    "streams when finished?",
                    default=True),
            ))
        return configs

    def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [
                TriggerDefinition(conf, None) for conf in defs
            ]
        self.trigger_map = dict(
            (tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(
            self.config, db=self.db, trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                log.error("Could not load plugin %s: Import failed. %s" %
                          (name, e))
            except (simport.MissingMethodOrFunction, simport.MissingModule,
                    simport.BadDirectory) as e:
                log.error("Could not load plugin %s: Not found. %s" %
                          (name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return datetime.datetime.utcnow()

    def _log_statistics(self):
        logger.info(
            "Loaded %s streams. Fired %s, Expired %s." %
            (self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config,
                                self.pipeline_handlers)
            new_events = pipeline.handle_events(events, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" %
                         (pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error(
                    "Stream %s locked while trying to set 'complete' state! "
                    "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error(
                "Stream %s locked while trying to set 'expire_error' state! "
                "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def fire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" %
                             (stream.name, stream.id, pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired += 1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error(
                    "Trigger %s for stream %s has unknown pipeline %s" %
                    (stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired += 1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size,
                                            self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def run(self):
        while True:
            fire_ct = self.process_ready_streams(
                self.pipeline_worker_batch_size)
            expire_ct = self.process_ready_streams(
                self.pipeline_worker_batch_size, expire=True)

            if (self.current_time() -
                    self.last_status).seconds > self.statistics_period:
                self._log_statistics()

            if not fire_ct and not expire_ct:
                logger.debug("No streams to fire or expire. Sleeping...")
                time.sleep(self.pipeline_worker_delay)
예제 #4
0
class PipelineManager(object):
    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(
            dict(
                pipeline_handlers=ConfigItem(
                    required=True,
                    help="dictionary of pipeline handlers to load "
                    "Classes specified with simport syntax. "
                    "simport docs for more info"),
                pipeline_worker_batch_size=ConfigItem(
                    help="Number of streams for pipeline "
                    "worker(s) to load at a time",
                    default=1000),
                pipeline_worker_delay=ConfigItem(
                    help="Number of seconds for pipeline worker "
                    "to sleep when it finds no streams to "
                    "process",
                    default=10),
                pipeline_config=ConfigItem(required=True,
                                           help="Name of pipeline config file "
                                           "defining the handlers for each "
                                           "pipeline."),
                purge_completed_streams=ConfigItem(
                    help="Delete successfully proccessed "
                    "streams when finished?",
                    default=True),
                trim_events=ConfigItem(
                    help="Delete events older than a configurable time.",
                    default=False),
                trim_events_age=ConfigItem(
                    help="Delete events older than this (timex expr).",
                    default="$timestamp - 14d"),
                trim_events_batch_size=ConfigItem(
                    help="Maximum number of events for pipeline "
                    "worker(s) to trim at a time",
                    default=100),
            ))
        return configs

    def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None,
                 time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s" %
                     (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs
                ]

        self.trigger_manager = TriggerManager(
            self.config,
            db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                logger.error("Could not load plugin %s: Import failed. %s" %
                             (name, e))
            except (simport.MissingMethodOrFunction, simport.MissingModule,
                    simport.BadDirectory) as e:
                logger.error("Could not load plugin %s: Not found. %s" %
                             (name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return self.time_sync.current_time()

    def _log_statistics(self):
        logger.info(
            "Loaded %s streams. Fired %s, Expired %s." %
            (self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config,
                                self.pipeline_handlers)
            new_events = pipeline.handle_events(events, stream, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" %
                         (pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error(
                    "Stream %s locked while trying to set 'complete' state! "
                    "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error(
                "Stream %s locked while trying to set 'expire_error' state! "
                "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def add_trigger_definition(self, list_of_triggerdefs):
        self.trigger_manager.add_trigger_definition(list_of_triggerdefs)

    def delete_trigger_definition(self, trigger_def_name):
        self.trigger_manager.delete_trigger_definition(trigger_def_name)

    def fire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" %
                             (stream.name, stream.id, pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired += 1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error(
                    "Trigger %s for stream %s has unknown pipeline %s" %
                    (stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired += 1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size,
                                            self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def process_trim_events(self):
        trim_date = self.trim_events_age().timestamp
        event_ids = self.db.find_older_events(trim_date,
                                              self.trim_events_batch_size)
        logger.debug("Trimming %s old events" % len(event_ids))
        self.db.purge_events(event_ids)
        return len(event_ids)

    def run(self):
        while True:
            try:
                fire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size)
                expire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size, expire=True)

                trim_ct = 0
                if self.trim_events:
                    trim_ct = self.process_trim_events()

                if ((self.current_time() - self.last_status).seconds >
                        self.statistics_period):
                    self._log_statistics()

                if not fire_ct and not expire_ct and not trim_ct:
                    logger.debug("No streams to fire or expire. Sleeping...")
                    time.sleep(self.pipeline_worker_delay)
            except DatabaseConnectionError:
                logger.warn("Database Connection went away. Reconnecting...")
                time.sleep(5)
                # DB layer will reconnect automatically. We just need to
                # retry the operation. (mdragon)
            except Exception:
                logger.exception("Unknown Error in pipeline worker!")
                raise