def end_to_end_test_handler(msg): """Request to run E2E tests. Return message with new generated dynamic workflow in the header. :param msg: :return: """ now = datetime.datetime.utcnow() start_timestamp = int(now.replace(microsecond=0).timestamp()) header = msg.get('header', {}) # Set process id before call to logger.configure header['process_id'] = header.get('process_id', f"{start_timestamp}.e2e_test") logger.configure(msg, 'E2E Test') logger.info("Clear any previous test data") e2etest = E2ETest(header['process_id']) e2etest.cleartests() logger.info("Start E2E Test") return { 'header': { **header, 'timestamp': now.isoformat(), 'workflow': e2etest.get_workflow(), }, 'contents': '' }
def distribute_file(conn_info, filename): """ Copy the checked file to its final location Check and copy is implemented as a indivisible action. If the check is OK then the file is copied to its final location in one action. The time between the check and the copy action is as short as possible So no extra workflow step has been introduced (possible queueing) :param conn_info: :param filename: :return: """ # Remove export dir from filename to get destination file name dst = re.sub(rf'^{EXPORT_DIR}/', '', filename) # Copy the file to the destination location logger.info(f"Distribute to {dst}") conn_info['connection'].copy_object(CONTAINER_BASE, filename, f"{CONTAINER_BASE}/{dst}") # Do not delete the file from its temporary location because a re-run would cause missing file errors # Cleanup any date files at the destination location cleanup_datefiles(conn_info['connection'], CONTAINER_BASE, dst)
def apply_events(storage, last_events, start_after, stats): """Apply any unhandled events to the database :param storage: GOB (events + entities) :param start_after: the is of the last event that has been applied to the storage :param stats: update statitics for this action :return: """ with ActiveGarbageCollection( "Apply events"), storage.get_session() as session: logger.info("Apply events") PROCESS_PER = 10000 add_event_tids = set() with ProgressTicker("Apply events", PROCESS_PER) as progress: unhandled_events = storage.get_events_starting_after( start_after, PROCESS_PER) while unhandled_events: with EventApplicator(storage) as event_applicator: for event in unhandled_events: progress.tick() gob_event, count, applied_events = event_applicator.apply( event, last_events, add_event_tids) action = gob_event.action stats.add_applied(action, count) start_after = event.eventid # Remove event from session, to avoid trying to update event db object session.expunge(event) event_applicator.apply_all() unhandled_events = storage.get_events_starting_after( start_after, PROCESS_PER)
def _download_sources(conn_info, directory, filenames) -> List[Tuple[str, str]]: """ :param conn_info: :param directory: :param filenames: list of tuples (dst_path, src_filename) :return: """ path = Path(directory) path.mkdir(exist_ok=True) src_files = [] for dst_path, filename in filenames: src_file_info, src_file = _get_file(conn_info, filename) temp_file = os.path.join(directory, dst_path) path = Path(os.path.dirname(temp_file)) path.mkdir(exist_ok=True, parents=True) with open(temp_file, "wb") as f: f.write(src_file) src_files.append((dst_path, temp_file)) logger.info(f"{len(src_files)} source files downloaded") return src_files
def _propose_check_file(proposals, filename, obj_info, obj): """ Build a proposal to check the given file :param filename: Name of the file to check :param obj_info: Current file object info :param obj: Current file object :return: proposal object """ proposal_key = filename for src, dst in _REPLACEMENTS.items(): # heuristic method to convert variable values to a variable name if re.search(dst, filename): proposal_key = re.sub(dst, src, proposal_key) # Base the proposal on the analysis of the current file analysis = _get_analysis(obj_info, obj) analysis["age_hours"] = 24 proposal = {} for key, value in analysis.items(): if key in _MAXIMUM_VALUES: proposal[key] = [0, value] elif key in _MINIMUM_VALUES: proposal[key] = [value, None] elif key in _ABSOLUTE_VALUES: proposal[key] = [value] else: # Within limits low, high = _get_low_high(value) proposal[key] = [low, high] logger.info(f"Proposal generated for {proposal_key}") proposals[proposal_key] = proposal
def _process_events(storage, events, stats): """Store and apply events :param storage: GOB (events + entities) :param event: the event to process :param stats: update statitics for this action :return: """ # Get the max eventid of the entities and the last eventid of the events entity_max_eventid, last_eventid = get_event_ids(storage) logger.info( f"Events are at {last_eventid or 0:,}, model is at {entity_max_eventid or 0:,}" ) # Get all source_id - last_event combinations to check for validity and existence with storage.get_session(): last_events = storage.get_last_events( ) # { source_id: last_event, ... } if is_corrupted(entity_max_eventid, last_eventid): logger.error("Model is inconsistent! data is more recent than events") elif entity_max_eventid == last_eventid: logger.info("Model is up to date") # Add new events return _store_events(storage, last_events, events, stats) else: logger.warning("Model is out of date, Further processing has stopped")
def _store_events(storage, last_events, events, stats): """Store events in GOB Only valid events are stored, other events are skipped (with an associated warning) The events are added in bulk in the database :param storage: GOB (events + entities) :param events: the events to process :param stats: update statitics for this action :return: """ with ActiveGarbageCollection("Store events"), storage.get_session(): # Use a session to commit all or rollback on any error logger.info("Store events") with ProgressTicker("Store events", 10000) as progress, \ EventCollector(storage, last_events) as event_collector: for event in events: progress.tick() if event_collector.collect(event): stats.store_event(event) else: stats.skip_event(event)
def prepare_relate(msg): """ The starting point for the relate process. A relate job will be split into individual relate jobs on attribute level. If there's only a catalog in the message, all collections of that catalog will be related. When a job which has been split is received the relation name will be added and the job will be forwarded to the next step of the relate process where the relations are being made. :param msg: a message from the broker containing the catalog and collections (optional) :return: the result message of the relate preparation step """ header = msg.get('header', {}) catalog_name = header.get('catalogue') collection_name = header.get('collection') attribute_name = header.get('attribute') application = "GOBRelate" msg["header"] = { **msg.get("header", {}), "version": "0.1", "source": "GOB", "application": application, "entity": collection_name } timestamp = datetime.datetime.utcnow().isoformat() msg["header"].update({ "timestamp": timestamp, }) logger.configure(msg, "RELATE") if not catalog_name or not collection_name or not attribute_name: # A job will be splitted when catalog, collection or attribute are not provided logger.info("Splitting relate job") _split_job(msg) msg['header']['is_split'] = True return publish_result(msg, []) else: # If the job has all attributes, add the relation name and forward to the next step in the relate process logger.info(f"** Relate {catalog_name} {collection_name} {attribute_name}") relation_name = get_relation_name(GOBModel(), catalog_name, collection_name, attribute_name) msg["header"].update({ "catalogue": "rel", "collection": relation_name, "entity": relation_name, "original_catalogue": catalog_name, "original_collection": collection_name, "original_attribute": attribute_name, }) return msg
def result(self): if self.fatal: raise GOBException( f"Quality assurance failed for {self.entity_name}" ) if self.duplicates: raise GOBException(f"Duplicate primary key(s) found in source: " f"[{', '.join([str(dup) for dup in self.duplicates])}]") logger.info("Quality assurance passed")
def _log_intro(self): """ If any unique columns have been defined, log an informational message stating that the file is checked :return: """ if self.unique_cols: unique_cols = ", ".join([str(cols) for cols in self.unique_cols]) logger.info( f"Checking {self.filename} for unique column values in columns {unique_cols}" )
def end_of_workflow(self, msg): logger.configure(msg, "WORKFLOW") on_complete = msg['header'].pop('on_workflow_complete', None) if on_complete is not None: if not isinstance(on_complete, dict) or not all([key in on_complete for key in ['exchange', 'key']]): logger.error("on_workflow_complete should be a dict with keys 'exchange' and 'key'") else: publish(on_complete['exchange'], on_complete['key'], msg) logger.info(f"Publish on_workflow_complete to {on_complete['exchange']} with {on_complete['key']}") logger.info("End of workflow") job_end(msg["header"].get("jobid"))
def cleanup_datefiles(connection, container, filename): """Delete previous files from ObjectStore. The file with filename is not deleted. """ cleanup_pattern = get_cleanup_pattern(filename) if cleanup_pattern == filename: # No dates in filename, nothing to do return logger.info(f'Clean previous files for {filename}.') for item in get_full_container_list(connection, container): if re.match(cleanup_pattern, item['name']) and item['name'] != filename: delete_object(connection, container, item) logger.info(f'File {item["name"]} deleted.')
def handle_import_object_msg(msg): logger.configure(msg, "IMPORT OBJECT") logger.info("Start import object") importer = MappinglessConverterAdapter(msg['header'].get('catalogue'), msg['header'].get('entity'), msg['header'].get('entity_id_attr')) entity = importer.convert(msg['contents']) return { 'header': { **msg['header'], 'mode': ImportMode.SINGLE_OBJECT.value, 'collection': msg['header'].get('entity'), }, 'summary': logger.get_summary(), 'contents': [entity] }
def handle_brp_regression_test_msg(msg): logger.configure(msg, 'BRP Regression test') results = BrpRegression(logger).run() writer = ObjectstoreResultsWriter(results, 'regression_tests/results/brp') writer.write() logger.info( "Written test results to Objecstore at regression_tests/results/brp") return { 'header': { **msg.get('header', {}), 'timestamp': datetime.datetime.utcnow().isoformat(), }, 'summary': logger.get_summary(), }
def enrich(self, entity): for column, specs in self.enrich_spec.items(): enricher = self.enrichers[specs["type"]] value, logging = enricher["func"](storage=self.storage, data=entity, specs=specs, column=column, assigned=self.assigned) if specs.get("dry_run", False) and value != entity.get(column): logger.info( f"Enrich dry run: Generated value {value} for entity {entity[specs['on']]}" ) else: entity[column] = value if logging: logger.info(logging)
def on_workflow_progress(msg): """ Process a workflow progress message The progress report is START, OK or FAIL :param msg: The message that contains the progress info :return: None """ status = msg['status'] step_info = step_status(msg['jobid'], msg['stepid'], status) if step_info and status in [STATUS_OK, STATUS_FAIL]: logger.configure(msg, "WORKFLOW") logger.info( f"Duration {str(step_info.end - step_info.start).split('.')[0]}") if status == STATUS_FAIL: logger.error(f"Program error: {msg['info_msg']}") logger.info("End of workflow") hooks.on_workflow_progress(msg)
def kafka_produce_handler(msg): logger.configure(msg, "KAFKA_PRODUCE") logger.info("Produce Kafka events") catalogue = msg.get('header', {}).get('catalogue') collection = msg.get('header', {}).get('collection') assert catalogue and collection, "Missing catalogue and collection in header" event_producer = KafkaEventProducer(catalogue, collection, logger) event_producer.produce() return { 'header': msg['header'], 'summary': { 'produced': event_producer.total_cnt, } }
def check_relations(src_catalog_name, src_collection_name, src_field_name): """ Check relations for any dangling relations Dangling can be because a relation exist without any bronwaarde or the bronwaarde cannot be matched with any referenced entity :param src_catalog_name: :param src_collection_name: :param src_field_name: :return: None """ name = f"{src_collection_name} {src_field_name}" # Only include sources where not none_allowed sources = GOBSources().get_field_relations(src_catalog_name, src_collection_name, src_field_name) check_sources = [ source['source'] for source in sources if not source.get('none_allowed', False) ] if not check_sources: logger.info( f"All sources for {src_catalog_name} {src_collection_name} {src_field_name} allow empty " f"relations. Skipping check.") return # Only filter on sources when necessary (i.e. when there are multiple sources with different values for # none_allowed) check_sources = check_sources if len(sources) != len( check_sources) else None missing_query = _get_relation_check_query("missing", src_catalog_name, src_collection_name, src_field_name, check_sources) _query_missing(missing_query, QA_CHECK.Sourcevalue_exists, name) dangling_query = _get_relation_check_query("dangling", src_catalog_name, src_collection_name, src_field_name, check_sources) _query_missing(dangling_query, QA_CHECK.Reference_exists, name)
def connect(self): # noqa: C901 """The first step of every import is a technical step. A connection need to be setup to connect to a database, filesystem, API, ... :return: """ # Get manually added config, or config based on application name datastore_config = self.source.get( 'application_config') or get_datastore_config( self.source['application']) read_config = {**self.source.get('read_config', {}), 'mode': self.mode} self.datastore = DatastoreFactory.get_datastore( datastore_config, read_config) self.datastore.connect() logger.info( f"Connection to {self.app} {self.datastore.user} has been made.")
def _check_file(check, filename, stats): """ Test if all checks that have been defined for the given file are OK :param filename: Name of the file to check :param stats: Statistics of the file :param checks: Check to apply onto the statistics :return: True if all checks succeed """ total_result = True _check_uniqueness(check) for key, margin in check.items(): # Get corresponding value for check if key not in stats: logger.warning(f"Value missing for {key} check in {filename}") continue value = stats[key] if len(margin) == 1: result = value == margin[0] formatted_margin = f"= {_fmt(margin[0])}" elif margin[0] is None: result = value <= margin[1] formatted_margin = f"<= {_fmt(margin[1])}" elif margin[1] is None: result = value >= margin[0] formatted_margin = f">= {_fmt(margin[0])}" else: result = margin[0] <= value <= margin[1] formatted_margin = f"{_fmt(margin[0])} - {_fmt(margin[1])}" total_result = total_result and result # Report any errors for the given filename as a group str_value = f"{value:,.2f}".replace( ".00", "") if type(value) in [float, int] else value extra_data = {'id': filename, 'data': {key: str_value}} if result: extra_data['id'] += " OK" logger.info("OK", extra_data) else: extra_data['data']['margin'] = formatted_margin logger.error("Check FAIL", extra_data) return total_result
def dump_collection(self, schema, catalog_name, collection_name, force_full=False): """ Dump a catalog collection into a remote database in the given schema If the dump fails the operation is retried with a maximum of MAX_TRIES and a wait between each try of RETRY_TIMEOUT seconds :param schema: :param catalog_name: :param collection_name: :return: """ tries = 0 while tries < Dumper.MAX_TRIES: tries += 1 logger.info(f"Try {tries}: dump {catalog_name} - {collection_name}") if self.try_dump_collection(schema, catalog_name, collection_name, force_full): # On Successful dump return # Wait RETRY_TIMEOUT seconds before next try time.sleep(self.RETRY_TIMEOUT) logger.error(f'Export {catalog_name}-{collection_name} failed after {Dumper.MAX_TRIES}')
def update_materialized_view(msg): """Updates materialized view for a relation for a given catalog, collection and attribute or relation name. Expects a message with headers: - catalogue - collection (if catalogue is 'rel' this should be the relation_name) - attribute (optional if catalogue is 'rel') examples of correct headers that are functionally equivalent: header = { "catalogue": "meetbouten", "collection": "meetbouten", "attribute": "ligt_in_buurt", } header = { "catalogue": "rel", "collection": "mbn_mbt_gbd_brt_ligt_in_buurt", } :param msg: :return: """ header = msg.get('header', {}) catalog_name = header.get('catalogue') collection_name = header.get('collection') attribute_name = header.get('attribute') logger.configure(msg, "UPDATE_VIEW") storage_handler = GOBStorageHandler() view = _get_materialized_view(catalog_name, collection_name, attribute_name) view.refresh(storage_handler) logger.info(f"Update materialized view {view.name}") timestamp = datetime.datetime.utcnow().isoformat() msg['header'].update({ "timestamp": timestamp }) return msg
def replace_header_references(uniques: list, header: list): """ Replaces column names in a uniques list with column indexes (1-based) Example, with header A;B;C;D;E;F : replace_header_references(['A', 'B', 'D']) => [1, 2, 4] replace_header_references([1, 2, 5]) => [1, 2, 5] # Leave as is :param uniques: :param header: :return: """ replaced = [ header.index(col) + 1 if isinstance(col, str) else col for col in uniques ] if uniques != replaced: logger.info( f"Interpreting columns {str(uniques)} as {str(replaced)}") return replaced
def data_consistency_test_handler(msg): """Request to run data consistency tests. :param msg: :return: """ catalog = msg['header'].get('catalogue') collection = msg['header'].get('collection') application = msg['header'].get('application') msg['header']['entity'] = msg['header'].get('entity', collection) logger.configure(msg, 'Data consistency test') assert all([catalog, collection ]), "Expecting header attributes 'catalogue' and 'collection'" id = f"{catalog} {collection} {application or ''}" # No return value. Results are captured by logger. logger.info(f"Data consistency test {id} started") try: DataConsistencyTest(catalog, collection, application).run() except GOBConfigException as e: logger.error(f"Dataset connection failed: {str(e)}") except (NotImplementedCatalogError, NotImplementedApplicationError, GOBException) as e: logger.error(f"Dataset test failed: {str(e)}") else: logger.info(f"Data consistency test {id} ended") return { 'header': { **msg.get('header', {}), 'timestamp': datetime.datetime.utcnow().isoformat(), }, 'summary': logger.get_summary(), }
def try_dump_collection(self, schema, catalog_name, collection_name, force_full=False): """ Try to dump the given catalog collection in the given schema The dump is performed by issuing an API POST request to the GOB API. :param schema: :param catalog_name: :param collection_name: :return: """ url = f"{self.dump_api}/dump/{catalog_name}/{collection_name}/" data = { "db": self.db_config, "schema": schema, "include_relations": False, "force_full": force_full, } headers = { "Content-Type": "application/json" } logger.info(f"Dump {catalog_name} - {collection_name} (schema: {schema})") start_request = time.time() success = False try: result = requests.post( url=url, json=data, headers=self.update_headers(url, headers), stream=True ) last_line = "" start_line = time.time() for line in result.iter_lines(chunk_size=1): last_line = line.decode() end_line = time.time() logger.info(f"{last_line} ({(end_line - start_line):.2f} / {(end_line - start_request):.2f} secs)") start_line = time.time() except Exception as e: logger.warning(f'Export {catalog_name}-{collection_name} failed: {str(e)}') else: success = re.match(r'Export completed', last_line) is not None if not success: logger.warning(f'Export {catalog_name}-{collection_name} completed with errors') finally: end_request = time.time() logger.info(f"Elapsed time: {(end_request - start_request):.2f} secs") return success
def process_relate(msg: dict): """ This function starts the actual relate process. The message is checked for completeness and the Relater builds the new or updated relations and returns the result the be compared as if it was the result of an import job. :param msg: a message from the broker containing the catalog and collections (optional) :return: the result message of the relate process """ logger.configure(msg, "RELATE SRC") _check_message(msg) header = msg.get('header') logger.info("Relate table started") full_update = header.get('mode', "update") == "full" if full_update: logger.info("Full relate requested") updater = Relater(header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY]) filename, confirms = updater.update(full_update) logger.info("Relate table completed") relation_name = get_relation_name(GOBModel(), header[CATALOG_KEY], header[COLLECTION_KEY], header[ATTRIBUTE_KEY]) result_msg = { "header": { **msg["header"], "catalogue": "rel", "collection": relation_name, "entity": relation_name, "source": "GOB", "application": "GOB", "version": RELATE_VERSION, "timestamp": msg.get("timestamp", datetime.datetime.utcnow().isoformat()), }, "summary": logger.get_summary(), "contents_ref": filename, "confirms": confirms, } return result_msg
def full_update(msg): """Store the events for the current dataset :param msg: the result of the application of the events :return: Result message """ logger.configure(msg, "UPDATE") logger.info( f"Update to GOB Database {GOBStorageHandler.user_name} started") # Interpret the message header message = ImportMessage(msg) metadata = message.metadata storage = GOBStorageHandler(metadata) model = f"{metadata.source} {metadata.catalogue} {metadata.entity}" logger.info(f"Store events {model}") # Get events from message events = msg["contents"] # Gather statistics of update process stats = UpdateStatistics() _process_events(storage, events, stats) # Build result message results = stats.results() stats.log() logger.info(f"Store events {model} completed", {'data': results}) results.update(logger.get_summary()) # Return the result message, with no log, no contents but pass-through any confirms message = { "header": msg["header"], "summary": results, "contents": None, "confirms": msg.get('confirms') } return message
def check_relation(msg): """ Check for any dangling relations :param msg: :return: """ header = msg.get('header', {}) catalog_name = header.get('original_catalogue') collection_name = header.get('original_collection') attribute_name = header.get('original_attribute') model = GOBModel() logger.configure(msg, "RELATE_CHECK") logger.info("Relate check started") collection = model.get_collection(catalog_name, collection_name) assert collection is not None, f"Invalid catalog/collection combination {catalog_name}/{collection_name}" reference = model._extract_references(collection['attributes']).get(attribute_name) try: is_very_many = reference['type'] == fully_qualified_type_name(VeryManyReference) check_function = check_very_many_relations if is_very_many else check_relations check_function(catalog_name, collection_name, attribute_name) except Exception as e: _log_exception(f"{attribute_name} check FAILED", e) logger.info("Relation conflicts check started") check_relation_conflicts(catalog_name, collection_name, attribute_name) logger.info("Relate check completed") return { "header": msg["header"], "summary": logger.get_summary(), "contents": None }
def test(catalogue): """ Test export files for a given catalogue :param catalogue: catalogue to test :return: None """ logger.info(f"Test export for catalogue {catalogue}") logger.info("Connect to Objectstore") config = get_datastore_config(GOB_OBJECTSTORE) datastore = DatastoreFactory.get_datastore(config) datastore.connect() container_name = CONTAINER_BASE logger.info(f"Load files from {container_name}") conn_info = { "connection": datastore.connection, "container": container_name } # Get test definitions for the given catalogue checks = _get_checks(conn_info, catalogue) # Make proposals for any missing test definitions proposals = {} for config in _export_config[catalogue]: resolve_config_filenames(config) for name, product in config.products.items(): filenames = [product['filename']] + [ product['filename'] for product in product.get('extra_files', []) ] for filename in filenames: # Check the previously exported file at its temporary location obj_info, obj = _get_file( conn_info, f"{EXPORT_DIR}/{catalogue}/{filename}") # Clone check so that changes to the check file don't affect other runs check = copy.deepcopy(_get_check(checks, filename)) # Report results with the name of the matched file matched_filename = obj_info['name'] if obj_info else filename if obj_info is None: logger.error(f"File {filename} MISSING") elif check: stats = _get_analysis(obj_info, obj, check) if _check_file(check, matched_filename, stats): logger.info(f"Check {matched_filename} OK") # Copy the file to its final location distribute_file(conn_info, matched_filename) else: logger.info(f"Check {matched_filename} FAILED") _propose_check_file(proposals, filename, obj_info, obj) else: logger.warning(f"File {filename} UNCHECKED") # Do not copy unchecked files _propose_check_file(proposals, filename, obj_info, obj) # Write out any missing test definitions _write_proposals(conn_info, catalogue, checks, proposals)
def compare(msg): """Compare new data in msg (contents) with the current data :param msg: The new data, including header and summary :return: result message """ logger.configure(msg, "COMPARE") header = msg.get('header', {}) mode = header.get('mode', FULL_UPLOAD) logger.info( f"Compare (mode = {mode}) to GOB Database {GOBStorageHandler.user_name} started" ) # Parse the message header message = ImportMessage(msg) metadata = message.metadata # Get the model for the collection to be compared gob_model = GOBModel() entity_model = gob_model.get_collection(metadata.catalogue, metadata.entity) # Initialize a storage handler for the collection storage = GOBStorageHandler(metadata) model = f"{metadata.source} {metadata.catalogue} {metadata.entity}" logger.info(f"Compare {model}") stats = CompareStatistics() tmp_table_name = None with storage.get_session(): with ProgressTicker("Collect compare events", 10000) as progress: # Check any dependencies if not meets_dependencies(storage, msg): return { "header": msg["header"], "summary": logger.get_summary(), "contents": None } enricher = Enricher(storage, msg) populator = Populator(entity_model, msg) # If there are no records in the database all data are ADD events initial_add = not storage.has_any_entity() if initial_add: logger.info("Initial load of new collection detected") # Write ADD events directly, without using a temporary table contents_writer = ContentsWriter() contents_writer.open() # Pass a None confirms_writer because only ADD events are written collector = EventCollector(contents_writer, confirms_writer=None, version=entity_model['version']) collect = collector.collect_initial_add else: # Collect entities in a temporary table collector = EntityCollector(storage) collect = collector.collect tmp_table_name = collector.tmp_table_name for entity in msg["contents"]: progress.tick() stats.collect(entity) enricher.enrich(entity) populator.populate(entity) collect(entity) collector.close() if initial_add: filename = contents_writer.filename confirms = None contents_writer.close() else: # Compare entities from temporary table with storage.get_session(): diff = storage.compare_temporary_data(tmp_table_name, mode) filename, confirms = _process_compare_results( storage, entity_model, diff, stats) # Build result message results = stats.results() logger.info(f"Compare {model} completed", {'data': results}) results.update(logger.get_summary()) message = { "header": msg["header"], "summary": results, "contents_ref": filename, "confirms": confirms } return message