class ComponentManager: """ Base class for decisionengine components such as Sources and Channels """ def __init__(self, name): """ :type name: :obj:`str` :arg name: Name of source corresponding to this source manager """ self.name = name self.state = ProcessingState() self.loglevel = multiprocessing.Value("i", logging.WARNING) def get_state_value(self): return self.state.get_state_value() def get_state(self): return self.state.get() def get_state_name(self): return self.get_state().name def set_loglevel_value(self, log_level): """Assumes log_level is a string corresponding to the supported logging-module levels.""" with self.loglevel.get_lock(): # Convert from string to int form using technique # suggested by logging module self.loglevel.value = getattr(logging, log_level) def get_loglevel(self): with self.loglevel.get_lock(): return self.loglevel.value def take_offline(self): """ Adjust status to offline """ self.state.set(State.OFFLINE)
def test_wrong_value_on_assignment(): state = ProcessingState() with pytest.raises(Exception, match='Supplied value is not a State variable'): state.set(1.3)
class TaskManager: """ Task Manager """ def __init__(self, name, generation_id, channel_dict, global_config): """ :type name: :obj:`str` :arg name: Name of channel corresponding to this task manager :type generation_id: :obj:`int` :arg generation_id: Task Manager generation id provided by caller :type channel_dict: :obj:`dict` :arg channel_dict: channel configuration :type global_config: :obj:`dict` :arg global_config: global configuration """ self.id = str(uuid.uuid4()).upper() self.dataspace = dataspace.DataSpace(global_config) self.data_block_t0 = datablock.DataBlock( self.dataspace, name, self.id, generation_id) # my current data block self.name = name self.channel = Channel(channel_dict) self.state = ProcessingState() self.loglevel = multiprocessing.Value('i', logging.WARNING) self.lock = threading.Lock() # The rest of this function will go away once the source-proxy # has been reimplemented. for src_worker in self.channel.sources.values(): src_worker.worker.post_create(global_config) def wait_for_all(self, events_done): """ Wait for all sources or transforms to finish :type events_done: :obj:`list` :arg events_done: list of events to wait for """ logging.getLogger().info('Waiting for all tasks to run') try: while not all([e.is_set() for e in events_done]): time.sleep(1) if self.state.should_stop(): break for e in events_done: e.clear() except Exception: # pragma: no cover logging.getLogger().exception("Unexpected error!") raise def wait_for_any(self, events_done): """ Wait for any sources to finish :type events_done: :obj:`list` :arg events_done: list of events to wait for """ try: while not any([e.is_set() for e in events_done]): time.sleep(1) if self.state.should_stop(): break for e in events_done: if e.is_set(): e.clear() except Exception: # pragma: no cover logging.getLogger().exception("Unexpected error!") raise def run(self): """ Task Manager main loop """ logging.getLogger().setLevel(self.loglevel.value) logging.getLogger().info(f'Starting Task Manager {self.id}') done_events, source_threads = self.start_sources(self.data_block_t0) # This is a boot phase # Wait until all sources run at least one time self.wait_for_all(done_events) logging.getLogger().info('All sources finished') if not self.state.has_value(State.BOOT): for thread in source_threads: thread.join() logging.getLogger().error( f'Error occured during initial run of sources. Task Manager {self.name} exits' ) return while not self.state.should_stop(): try: self.decision_cycle() if not self.state.should_stop(): # the last decision_cycle completed without error self.state.set(State.STEADY) self.wait_for_any(done_events) except Exception: # pragma: no cover logging.getLogger().exception( "Exception in the task manager main loop") logging.getLogger().error( 'Error occured. Task Manager %s exits with state %s', self.id, self.get_state_name()) break else: # we did not use 'break' to exit the loop logging.getLogger().info( f'Task Manager {self.id} received stop signal and exits') for source in self.channel.sources.values(): source.stop_running.set() for transform in self.channel.transforms.values(): transform.stop_running.set() # the run_transform function is performing our .join() # for these threads. for thread in source_threads: thread.join() def get_state_value(self): return self.state.get().value def get_state(self): return self.state.get() def get_state_name(self): return self.get_state().name def get_produces(self): # FIXME: What happens if a transform and source have the same name? produces = {} for name, mod in self.channel.sources.items(): produces[name] = list(mod.worker._produces.keys()) for name, mod in self.channel.transforms.items(): produces[name] = list(mod.worker._produces.keys()) return produces def get_consumes(self): # FIXME: What happens if a transform and publisher have the same name? consumes = {} for name, mod in self.channel.transforms.items(): consumes[name] = list(mod.worker._consumes.keys()) for name, mod in self.channel.publishers.items(): consumes[name] = list(mod.worker._consumes.keys()) return consumes def set_loglevel_value(self, log_level): """Assumes log_level is a string corresponding to the supported logging-module levels.""" with self.loglevel.get_lock(): # Convert from string to int form using technique # suggested by logging module self.loglevel.value = getattr(logging, log_level) def get_loglevel(self): with self.loglevel.get_lock(): return self.loglevel.value def set_to_shutdown(self): self.state.set(State.SHUTTINGDOWN) logging.getLogger("decision_engine").debug( 'Shutting down. Will call ' 'shutdown on all publishers') for publisher_worker in self.channel.publishers.values(): publisher_worker.worker.shutdown() self.state.set(State.SHUTDOWN) def take_offline(self, current_data_block): """ offline and stop task manager """ self.state.set(State.OFFLINE) # invalidate data block # not implemented yet def data_block_put(self, data, header, data_block): """ Put data into data block :type data: :obj:`dict` :arg data: key, value pairs :type header: :obj:`~datablock.Header` :arg header: data header :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ if not isinstance(data, dict): logging.getLogger().error( f'data_block put expecting {dict} type, got {type(data)}') return logging.getLogger().debug(f'data_block_put {data}') with data_block.lock: metadata = datablock.Metadata( data_block.taskmanager_id, state='END_CYCLE', generation_id=data_block.generation_id) for key, product in data.items(): data_block.put(key, product, header, metadata=metadata) def do_backup(self): """ Duplicate current data block and return its copy :rtype: :obj:`~datablock.DataBlock` """ with self.lock: data_block = self.data_block_t0.duplicate() logging.getLogger().debug(f'Duplicated block {data_block}') return data_block def decision_cycle(self): """ Decision cycle to be run periodically (by trigger) """ data_block_t1 = self.do_backup() try: self.run_transforms(data_block_t1) except Exception: # pragma: no cover logging.getLogger().exception( "error in decision cycle(transforms) ") # We do not call 'take_offline' here because it has # already been called in the run_transform code on # operating on a separate thread. actions_facts = [] try: actions_facts = self.run_logic_engine(data_block_t1) logging.getLogger().info('ran all logic engines') except Exception: # pragma: no cover logging.getLogger().exception( "error in decision cycle(logic engine) ") self.take_offline(data_block_t1) for a_f in actions_facts: try: self.run_publishers(a_f['actions'], a_f['newfacts'], data_block_t1) except Exception: # pragma: no cover logging.getLogger().exception( "error in decision cycle(publishers) ") self.take_offline(data_block_t1) def run_source(self, src): """ Get the data from source and put it into the data block :type src: :obj:`~Worker` :arg src: source Worker """ # If task manager is in offline state, do not keep executing sources. while not self.state.should_stop(): try: logging.getLogger().info(f'Src {src.name} calling acquire') data = src.worker.acquire() Module.verify_products(src.worker, data) logging.getLogger().info(f'Src {src.name} acquire retuned') logging.getLogger().info(f'Src {src.name} filling header') if data: t = time.time() header = datablock.Header( self.data_block_t0.taskmanager_id, create_time=t, creator=src.module) logging.getLogger().info(f'Src {src.name} header done') self.data_block_put(data, header, self.data_block_t0) logging.getLogger().info( f'Src {src.name} data block put done') else: logging.getLogger().warning( f'Src {src.name} acquire retuned no data') src.run_counter += 1 src.data_updated.set() logging.getLogger().info( f'Src {src.name} {src.module} finished cycle') except Exception: logging.getLogger().exception( f'Exception running source {src.name} ') self.take_offline(self.data_block_t0) if src.schedule > 0: s = src.stop_running.wait(src.schedule) if s: logging.getLogger().info( f'received stop_running signal for {src.name}') break else: logging.getLogger().info(f'source {src.name} runs only once') break logging.getLogger().info(f'stopped {src.name}') def start_sources(self, data_block=None): """ Start sources, each in a separate thread :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ event_list = [] source_threads = [] for key, source in self.channel.sources.items(): logging.getLogger().info(f'starting loop for {key}') event_list.append(source.data_updated) thread = threading.Thread(target=self.run_source, name=source.name, args=(source, )) source_threads.append(thread) # Cannot catch exception from function called in separate thread thread.start() return (event_list, source_threads) def run_transforms(self, data_block=None): """ Run transforms. So far in main process. :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ logging.getLogger().info('run_transforms') logging.getLogger().debug(f'run_transforms: data block {data_block}') if not data_block: return event_list = [] threads = [] for key, transform in self.channel.transforms.items(): logging.getLogger().info(f'starting transform {key}') event_list.append(transform.data_updated) thread = threading.Thread(target=self.run_transform, name=transform.name, args=(transform, data_block)) threads.append(thread) # Cannot catch exception from function called in separate thread thread.start() self.wait_for_all(event_list) for thread in threads: thread.join() logging.getLogger().info('all transforms finished') def run_transform(self, transform, data_block): """ Run a transform :type transform: :obj:`~Worker` :arg transform: source Worker :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ data_to = self.channel.task_manager.get('data_TO', _TRANSFORMS_TO) consume_keys = list(transform.worker._consumes.keys()) logging.getLogger().info( 'transform: %s expected keys: %s provided keys: %s', transform.name, consume_keys, list(data_block.keys())) loop_counter = 0 while not self.state.should_stop(): # Check if data is ready if set(consume_keys) <= set(data_block.keys()): # data is ready - may run transform() logging.getLogger().info('run transform %s', transform.name) try: with data_block.lock: data = transform.worker.transform(data_block) logging.getLogger().debug(f'transform returned {data}') t = time.time() header = datablock.Header(data_block.taskmanager_id, create_time=t, creator=transform.name) self.data_block_put(data, header, data_block) logging.getLogger().info('transform put data') except Exception: # pragma: no cover logging.getLogger().exception( f'exception from transform {transform.name} ') self.take_offline(data_block) break s = transform.stop_running.wait(1) if s: logging.getLogger().info( f'received stop_running signal for {transform.name}') break loop_counter += 1 if loop_counter == data_to: logging.getLogger().info( f'transform {transform.name} did not get consumes data' f'in {data_to} seconds. Exiting') break transform.data_updated.set() def run_logic_engine(self, data_block=None): """ Run Logic Engine. :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ le_list = [] if not data_block: return try: for le in self.channel.le_s: logging.getLogger().info('run logic engine %s', self.channel.le_s[le].name) logging.getLogger().debug('run logic engine %s %s', self.channel.le_s[le].name, data_block) rc = self.channel.le_s[le].worker.evaluate(data_block) le_list.append(rc) logging.getLogger().info('run logic engine %s done', self.channel.le_s[le].name) logging.getLogger().info( 'logic engine %s generated newfacts: %s', self.channel.le_s[le].name, rc['newfacts'].to_dict(orient='records')) logging.getLogger().info( 'logic engine %s generated actions: %s', self.channel.le_s[le].name, rc['actions']) # Add new facts to the datablock # Add empty dataframe if nothing is available if le_list: all_facts = pandas.concat([i['newfacts'] for i in le_list], ignore_index=True) else: logging.getLogger().info( 'Logic engine(s) did not return any new facts') all_facts = pandas.DataFrame() data = {'de_logicengine_facts': all_facts} t = time.time() header = datablock.Header(data_block.taskmanager_id, create_time=t, creator='logicengine') self.data_block_put(data, header, data_block) except Exception: # pragma: no cover logging.getLogger().exception("Unexpected error!") raise else: return le_list def run_publishers(self, actions, facts, data_block=None): """ Run Publishers in main process. :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ if not data_block: return try: log = logging.getLogger() for action_list in actions.values(): for action in action_list: publisher = self.channel.publishers[action] name = publisher.name log.info(f'run publisher {name}') log.debug(f'run publisher {name} {data_block}') try: publisher.worker.publish(data_block) except KeyError as e: if self.state.should_stop(): log.warning( f"TaskManager stopping, ignore exception {name} publish() call: {e}" ) continue else: raise except Exception: # pragma: no cover logging.getLogger().exception("Unexpected error!") raise
class Reaper: """ Reaper provides functionality of periodic deletion of data older than retention_interval in days The class attributes indicate a rational set of defaults that shouldn't be altered by user configuration. """ MIN_RETENTION_INTERVAL_DAYS = 7 MIN_SECONDS_BETWEEN_RUNS = 2 * 60 * 59 def __init__(self, config): """ :type config: :obj:`dict` :arg config: Configuration dictionary """ # Validate configuration self.logger = structlog.getLogger(LOGGERNAME) self.logger = self.logger.bind(module=__name__.split(".")[-1], channel=DELOGGER_CHANNEL_NAME) self.logger.debug("Initializing a reaper") # since we must validate this, have a private store space self.__retention_interval = self.MIN_RETENTION_INTERVAL_DAYS self.__seconds_between_runs = self.MIN_SECONDS_BETWEEN_RUNS if not config.get("dataspace"): self.logger.exception("Error in initializing Reaper!") raise dataspace.DataSpaceConfigurationError( "Invalid dataspace configuration: " "dataspace key not found in dictionary") elif not isinstance(config.get("dataspace"), dict): self.logger.exception("Error in initializing Reaper!") raise dataspace.DataSpaceConfigurationError( "Invalid dataspace configuration: " "dataspace key must correspond to a dictionary") try: db_driver_name = config["dataspace"]["datasource"]["name"] db_driver_module = config["dataspace"]["datasource"]["module"] db_driver_config = config["dataspace"]["datasource"]["config"] self.retention_interval = config["dataspace"][ "retention_interval_in_days"] self.seconds_between_runs = config["dataspace"].get( "reaper_run_interval", 24 * 60 * 60) except KeyError: self.logger.exception("Error in initializing Reaper!") raise dataspace.DataSpaceConfigurationError( "Invalid dataspace configuration") self.datasource = dataspace.DataSourceLoader().create_datasource( db_driver_module, db_driver_name, db_driver_config) self.thread = None self.state = ProcessingState() @property def retention_interval(self): """We have data constraints, so use a property to track""" return self.__retention_interval @retention_interval.setter def retention_interval(self, value): if int(value) < self.MIN_RETENTION_INTERVAL_DAYS: self.logger.exception("Error in initializing Reaper!") raise ValueError( f"For safety the data retention interval has to be greater than {self.MIN_RETENTION_INTERVAL_DAYS} days" ) self.logger.debug(f"Reaper setting retention_interval to {value}.") self.__retention_interval = int(value) @property def seconds_between_runs(self): """We have data constraints, so use a property to track""" return self.__seconds_between_runs @seconds_between_runs.setter def seconds_between_runs(self, value): if int(value) < self.MIN_SECONDS_BETWEEN_RUNS: self.logger.exception("Error in initializing Reaper!") raise ValueError( f"For performance the time between runs to be greater than {self.MIN_SECONDS_BETWEEN_RUNS} seconds" ) self.logger.debug(f"Reaper setting seconds_between_runs to {value}.") self.__seconds_between_runs = int(value) def reap(self): """ Actually spawn the query to delete the old records. Lock the state as this task doesn't have a cancel option. """ with self.state.lock: if self.state.should_stop(): return self.logger.info("Reaper.reap() started.") self.state.set(State.ACTIVE) self.datasource.delete_data_older_than(self.retention_interval) self.state.set(State.STEADY) self.logger.info("Reaper.reap() completed.") def _reaper_loop(self, delay): """ The thread actually runs this. """ self.logger.debug("Reaper thread started.") between_runs = delay while not self.state.should_stop(): try: with self.state.lock: if not self.state.should_stop(): # The start function will block until the state is changed from BOOT # If we are signaled to stop, don't override that state self.state.set(State.IDLE) self.logger.debug( f"Reaper waiting {between_runs} seconds or for a stop.") self.state.wait_until(STOPPING_CONDITIONS, timeout=between_runs) # in stop state, end wait and get out of the loop ASAP if self.state.should_stop(): self.logger.debug( f"Reaper recieved a stop event: {self.state.get()}.") else: self.reap() # after waiting the initial delay time # use the self.seconds_between_runs as the delay if between_runs != self.seconds_between_runs: between_runs = self.seconds_between_runs except Exception: # pragma: no cover self.state.set(State.ERROR) self.logger.exception( f"Reaper.reap() failed: {self.state.get()}.") break else: # we did not use 'break' to exit the loop self.state.set(State.SHUTDOWN) self.logger.info("Reaper shutdown cleanly.") def start(self, delay=0): """ Start thread with an optional delay to start the thread in X seconds """ if self.state.should_stop() and not self.state.inactive(): self.logger.debug( f"Reaper asked to start during stop: {self.state.get()}.") if isinstance(self.thread, threading.Thread) and self.thread.is_alive(): try: # threads need to end, else we orphan one and make a new thread raise RuntimeError( "Reaper asked to start, but it is running already.") except RuntimeError as __e: self.logger.exception(__e) raise try: # each invocation must be a new thread with self.state.lock: self.state.set(State.BOOT) self.thread = threading.Thread(group=None, target=self._reaper_loop, args=(delay, ), name="Reaper_loop_thread") self.thread.start() except Exception: # pragma: no cover self.logger.exception("Reaper loop thread not started") self.state.wait_while(State.BOOT) def stop(self): """ Try to stop the reaper, will block if the reaper cannot be interupted. """ if isinstance(self.thread, threading.Thread): if self.thread.is_alive(): with self.state.lock: if not self.state.should_stop(): self.logger.debug( "Sending reaper State.SHUTTINGDOWN signal") self.state.set(State.SHUTTINGDOWN) if self.state.has_value(State.SHUTTINGDOWN): self.state.wait_while(State.SHUTTINGDOWN) self.logger.info(f"Reaper shutdown : {self.state.get()}.") else: self.logger.debug( f"Reaper tried to stop but is stopped already: {self.state.get()}." ) else: self.logger.debug("Reaper tried to stop but was never started") def __repr__(self): # pragma: no cover return ( f"Reaper, retention interval {self.retention_interval,} days, " f"between runs wait {self.seconds_between_runs} seconds, state {self.state.get()}" )
def test_inactive(): state = ProcessingState() for set_to in INACTIVE_CONDITIONS: state.set(set_to) assert state.inactive()
def test_should_stop(): state = ProcessingState() for set_to in STOPPING_CONDITIONS: state.set(set_to) assert state.should_stop()
def test_probably_running(): state = ProcessingState() for set_to in RUNNING_CONDITIONS: state.set(set_to) assert state.probably_running()
class TaskManager: """ Task manager """ def __init__(self, name, workers, dataspace, expected_products, exchange, broker_url, queue_info): """ :type name: :obj:`str` :arg name: Name of channel corresponding to this task manager :type generation_id: :obj:`int` :arg generation_id: Task manager generation id provided by caller :type channel_dict: :obj:`dict` :arg channel_dict: channel configuration :type global_config: :obj:`dict` :arg global_config: global configuration """ self.name = name self.state = ProcessingState() self.loglevel = multiprocessing.Value("i", logging.WARNING) self.id = str(uuid.uuid4()).upper() self.data_block_t0 = datablock.DataBlock(dataspace, name, self.id, 1) # my current data block self.logger = structlog.getLogger(CHANNELLOGGERNAME) self.logger = self.logger.bind(module=__name__.split(".")[-1], channel=self.name) # The DE owns the sources self.source_workers = workers["sources"] self.transform_workers = workers["transforms"] self.logic_engine = workers["logic_engine"] self.publisher_workers = workers["publishers"] self.exchange = exchange self.broker_url = broker_url self.connection = Connection(self.broker_url) self.source_product_cache = SourceProductCache(expected_products, self.logger) self.queue_info = queue_info self.routing_keys = [info[1] for info in self.queue_info] def get_state_value(self): return self.state.get_state_value() def get_state(self): return self.state.get() def get_state_name(self): return self.get_state().name def set_loglevel_value(self, log_level): """Assumes log_level is a string corresponding to the supported logging-module levels.""" with self.loglevel.get_lock(): # Convert from string to int form using technique # suggested by logging module self.loglevel.value = getattr(logging, log_level) def get_loglevel(self): with self.loglevel.get_lock(): return self.loglevel.value def take_offline(self): """ Adjust status to stop the decision cycles and bring the task manager offline """ self.state.set(State.SHUTTINGDOWN) def run_cycle(self, messages): for name, msg_body in messages.items(): module_spec = msg_body["source_module"] module_name = msg_body["class_name"] data = msg_body["data"] assert data if data is State.SHUTDOWN: self.logger.info( f"Channel {self.name} has received shutdown flag from source {module_spec} (class {module_name})" ) self.take_offline() return assert isinstance(data, dict) self.logger.debug(f"Data received from {module_name}: {data}") data_to_process = self.source_product_cache.update(data) if data_to_process is None: return header = datablock.Header(self.data_block_t0.taskmanager_id, create_time=time.time(), creator=module_spec) self.logger.info(f"Source {module_name} header done") try: self.data_block_put(data_to_process, header, self.data_block_t0) except Exception: # pragma: no cover self.logger.exception( "Exception inserting data into the data block.") self.logger.error( f"Could not insert data from the following message\n{msg_body}" ) return self.logger.info(f"Source {module_name} data block put done") try: self.decision_cycle() with self.state.lock: if not self.state.should_stop(): # If we are signaled to stop, don't override that state # otherwise the last decision_cycle completed without error self.state.set(State.STEADY) CHANNEL_STATE_GAUGE.labels(self.name).set( self.get_state_value()) except Exception: # pragma: no cover self.logger.exception("Exception in the task manager main loop") self.logger.error( "Error occured. Task manager %s exits with state %s", self.id, self.get_state_name()) def run_cycles(self): """ Task manager main loop """ self.logger.setLevel(self.loglevel.value) self.logger.info(f"Starting task manager {self.id}") queues = [] for queue_name, key in self.queue_info: self.logger.debug( f"Creating queue {queue_name} with routing key {key}") queues.append( Queue( queue_name, exchange=self.exchange, routing_key=key, auto_delete=True, )) with LatestMessages(queues, self.broker_url) as messages: self.state.set(State.ACTIVE) self.logger.debug(f"Channel {self.name} is listening for events") while not self.state.should_stop(): msgs = messages.consume() if msgs: self.run_cycle(msgs) self.logger.info( f"Task manager {self.id} received stop signal and is shutting down" ) self.state.set(State.SHUTTINGDOWN) CHANNEL_STATE_GAUGE.labels(self.name).set(self.get_state_value()) self.logger.debug( "Shutting down. Will call shutdown on all publishers") for worker in self.publisher_workers.values(): worker.module_instance.shutdown() self.state.set(State.OFFLINE) CHANNEL_STATE_GAUGE.labels(self.name).set(self.get_state_value()) def get_produces(self): # FIXME: What happens if a transform and source have the same name? produces = {} for name, worker in self.source_workers.items(): produces[name] = list(worker.module_instance._produces.keys()) for name, worker in self.transform_workers.items(): produces[name] = list(worker.module_instance._produces.keys()) return produces def get_consumes(self): # FIXME: What happens if a transform and publisher have the same name? consumes = {} for name, worker in self.transform_workers.items(): consumes[name] = list(worker.module_instance._consumes.keys()) for name, worker in self.publisher_workers.items(): consumes[name] = list(worker.module_instance._consumes.keys()) return consumes def data_block_put(self, data, header, data_block): """ Put data into data block :type data: :obj:`dict` :arg data: key, value pairs :type header: :obj:`~datablock.Header` :arg header: data header :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ if not isinstance(data, dict): self.logger.error( f"data_block put expecting {dict} type, got {type(data)}") return self.logger.debug(f"data_block_put {data}") with data_block.lock: metadata = datablock.Metadata( data_block.taskmanager_id, state="END_CYCLE", generation_id=data_block.generation_id) for key, product in data.items(): data_block.put(key, product, header, metadata=metadata) def decision_cycle(self): """ Decision cycle to be run periodically (by trigger) """ data_block_t1 = self.data_block_t0.duplicate() self.logger.debug(f"Duplicated block {self.data_block_t0}") try: self.run_transforms(data_block_t1) except Exception: # pragma: no cover self.logger.exception("Error in decision cycle(transforms) ") # We do not call 'take_offline' here because it has # already been called during run_transform. actions = None try: actions = self.run_logic_engine(data_block_t1) self.logger.info("ran all logic engines") except Exception: # pragma: no cover self.logger.exception("Error in decision cycle(logic engine) ") self.take_offline() if actions is None: return try: self.run_publishers(actions, data_block_t1) except Exception: # pragma: no cover self.logger.exception("Error in decision cycle(publishers) ") self.take_offline() def run_transforms(self, data_block=None): """ Run transforms. So far in main process. :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ self.logger.info("run_transforms") self.logger.debug(f"run_transforms: data block {data_block}") if not data_block: return for key, worker in self.transform_workers.items(): self.logger.info(f"starting transform {key}") self.run_transform(worker, data_block) self.logger.info("all transforms finished") def run_transform(self, worker, data_block): """ Run a transform :type worker: :obj:`~Worker` :arg worker: Transform worker :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ consume_keys = list(worker.module_instance._consumes.keys()) self.logger.info("transform: %s expected keys: %s provided keys: %s", worker.name, consume_keys, list(data_block.keys())) self.logger.info("Run transform %s", worker.name) try: with TRANSFORM_RUN_HISTOGRAM.labels(self.name, worker.name).time(): data = worker.module_instance.transform(data_block) self.logger.debug(f"transform returned {data}") header = datablock.Header(data_block.taskmanager_id, create_time=time.time(), creator=worker.name) self.data_block_put(data, header, data_block) self.logger.info("transform put data") TRANSFORM_RUN_GAUGE.labels(self.name, worker.name).set_to_current_time() except Exception: # pragma: no cover self.logger.exception(f"exception from transform {worker.name} ") self.take_offline() def run_logic_engine(self, data_block): """ Run Logic Engine. :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ if not data_block: raise RuntimeError( "Cannot run logic engine on data block that is 'None'.") if self.logic_engine is None: self.logger.info("No logic engine to run") return None try: actions = new_facts = None with LOGICENGINE_RUN_HISTOGRAM.labels( self.name, self.logic_engine.name).time(): self.logger.info("Run logic engine %s", self.logic_engine.name) self.logger.debug("Run logic engine %s %s", self.logic_engine.name, data_block) actions, new_facts = self.logic_engine.module_instance.evaluate( data_block) self.logger.info("Run logic engine %s done", self.logic_engine.name) LOGICENGINE_RUN_GAUGE.labels( self.name, self.logic_engine.name).set_to_current_time() self.logger.info( "Logic engine %s generated newfacts: %s", self.logic_engine.name, new_facts.to_dict(orient="records"), ) self.logger.info("Logic engine %s generated actions: %s", self.logic_engine.name, actions) data = {"de_logicengine_facts": new_facts} header = datablock.Header(data_block.taskmanager_id, create_time=time.time(), creator="logicengine") self.data_block_put(data, header, data_block) return actions except Exception: # pragma: no cover self.logger.exception("Unexpected logic engine error!") raise def run_publishers(self, actions, data_block): """ Run Publishers in main process. :type data_block: :obj:`~datablock.DataBlock` :arg data_block: data block """ if not data_block: return try: for action_list in actions.values(): for action in action_list: worker = self.publisher_workers[action] name = worker.name self.logger.info(f"Run publisher {name}") self.logger.debug(f"Run publisher {name} {data_block}") try: with PUBLISHER_RUN_HISTOGRAM.labels(self.name, name).time(): worker.module_instance.publish(data_block) PUBLISHER_RUN_GAUGE.labels( self.name, name).set_to_current_time() except KeyError as e: if self.state.should_stop(): self.logger.warning( f"TaskManager stopping, ignore exception {name} publish() call: {e}" ) continue raise # pragma: no cover except Exception: # pragma: no cover self.logger.exception("Unexpected error!") raise