def step_impl(context, number_of_snapshots, match_type, snapshot_type): s3_qualified_prefix = os.path.join( context.mongo_snapshot_path, context.test_run_name, context.formatted_date, snapshot_type, ) topic_names = [ template_helper.get_topic_name(topic["topic"]) for topic in context.topics_for_test ] snapshot_count = int(number_of_snapshots) if snapshot_count == 0: for result in aws_helper.assert_no_snapshots_in_s3_threaded( topic_names, context.mongo_snapshot_bucket, s3_qualified_prefix, 60): console_printer.print_info( f"Asserted no snapshots created in s3 with key of {result}") else: snapshot_string = "snapshots" if snapshot_count > 1 else "snapshot" for result in aws_helper.assert_snapshots_in_s3_threaded( topic_names, context.mongo_snapshot_bucket, s3_qualified_prefix, snapshot_count, context.timeout, ("exact" == match_type), ): console_printer.print_info( f"Asserted exactly {number_of_snapshots} {snapshot_string} created in s3 with key of {result}" )
def dynamodb_clear_ingest_start(context, snapshot_type, topics_list): console_printer.print_info("Executing 'dynamodb_clear_ingest_start' fixture") updated_topics = message_helper.get_consolidated_topics_list( topics_list, snapshot_type, context.default_topic_list_full_delimited, context.default_topic_list_incremental_delimited, [ context.generate_snapshots_topics_override, context.send_snapshots_topics_override, ], ) correlation_id = ( snapshots_helper.get_snapshot_run_correlation_id( context.test_run_name, snapshot_type ) if not context.send_snapshots_correlation_id_override else context.send_snapshots_correlation_id_override ) for topic in updated_topics: topic_name = template_helper.get_topic_name(topic) export_status_helper.delete_item_in_export_status_table( context.dynamo_db_export_status_table_name, topic_name, correlation_id )
def get_metadata_for_id_and_timestamp_from_file(table_name, file_path, topic_name, wrap_id=False): """Returns the metadata for a given id and a tuple of the id and timestamp searched for. Arguments: table_name -- the table name to check file_path -- the file containing the id topic_name -- the topic name to get metadata for wrap_id -- True is the id format should be wrapped with an "id" object (default False) """ console_printer.print_info( f"Retrieving metadata for id from file in '{file_path}' in metadata table '{table_name}' with topic name of '{topic_name}'" ) qualified_topic_name = template_helper.get_topic_name(topic_name) record_id = file_helper.get_id_object_from_json_file(file_path) record_timestamp = file_helper.get_timestamp_as_long_from_json_file( file_path) id_string = json.dumps(record_id) if wrap_id: id_string = json.dumps({"id": id_string}) id_string_qualified = id_string.replace(" ", "") results = get_metadata_for_specific_id_and_timestamp_in_topic( table_name, id_string_qualified, record_timestamp, qualified_topic_name) return (id_string_qualified, record_timestamp, results)
def get_metadata_for_specific_id_and_timestamp_in_topic( table_name, id_string, timestamp, topic_name): """Returns the metadata for a given id and timestamp. Arguments: table_name -- the table name to check id_string -- the json dumped id string timestamp -- the timestamp as an int topic_name -- the topic name to get metadata for """ console_printer.print_info( f"Retrieving metadata for id of '{id_string}' in metadata table '{table_name}' with topic name of '{topic_name}' and timestamp of '{str(timestamp)}'" ) qualified_topic_name = template_helper.get_topic_name(topic_name) payload_dict = { "table-name": table_name, "hbase-id-like": id_string, "topic-name-equals": qualified_topic_name, "hbase-timestamp-equals": timestamp, } payload_json = json.dumps(payload_dict) return invoke_lambda.invoke_ingestion_metadata_query_lambda(payload_json)
def step_impl(context, table, id_format, message_type): folder = streaming_data_helper.generate_fixture_data_folder(message_type) context.latest_metadata_store_ids = [] wrap_id_value = id_format == "wrapped" table_name = streaming_data_helper.get_metadata_store_table_name( table, context.metadata_store_tables) for topic in context.topics_for_test: topic_name = template_helper.get_topic_name(topic["topic"]) temp_folder_for_topic = os.path.join(context.temp_folder, topic_name) full_folder_path = file_helper.generate_edited_files_folder( temp_folder_for_topic, folder) latest_file_path = file_helper.get_file_from_folder_with_latest_timestamp( full_folder_path) ( record_id, record_timestamp, results, ) = metadata_store_helper.get_metadata_for_id_and_timestamp_from_file( table_name, latest_file_path, topic_name, wrap_id_value) console_printer.print_info( f"Received {len(results)} responses in topic '{topic_name}'") console_printer.print_info( f"Actual metadata store results are '{results}' in topic '{topic_name}'" ) console_printer.print_info( f"Asserting exactly one response received for id of '{record_id}' and timestamp of '{str(record_timestamp)}' in topic '{topic_name}'" ) assert ( len(results) == 1 ), "Metadata table result not returned, try restarting the k2hb consumers" results_iterator = iter(results.items()) result_row_key_value_pair = next(results_iterator) result_row_key = result_row_key_value_pair[0] result_row_value = result_row_key_value_pair[1] console_printer.print_info( f"Asserting the key value for the result in topic '{topic_name}'") assert record_id in result_row_key console_printer.print_info( f"Asserted key value of '{result_row_key}' contains expected id of '{record_id}' in topic '{topic_name}'" ) console_printer.print_info( f"Asserting the field values for the result in topic '{topic_name}'" ) assert record_id in result_row_value["hbase_id"] assert record_timestamp == result_row_value["hbase_timestamp"] assert topic_name == result_row_value["topic_name"] context.latest_metadata_store_ids.append( [topic_name, record_id, record_timestamp])
def step_impl(context, message_type, date, key): folder = streaming_data_helper.generate_fixture_data_folder(message_type) topic_prefix = streaming_data_helper.generate_topic_prefix(message_type) qualified_key = None if key == "None" else key date_qualified = (None if date == "None" else datetime.strptime( date, "%Y-%m-%dT%H:%M:%S.%f")) for topic in context.topics_for_test: topic_name = template_helper.get_topic_name(topic["topic"]) generated_files = kafka_data_generator.generate_kafka_files( test_run_name=context.test_run_name, s3_input_bucket=context.s3_ingest_bucket, input_template_name=input_template, output_template_name=output_template, new_uuid=qualified_key, local_files_temp_folder=os.path.join(context.temp_folder, topic_name), fixture_files_root=context.fixture_path_local, s3_output_prefix=context.s3_temp_output_path, record_count=1, topic_name=topic["topic"], snapshots_output_folder=context. snapshot_files_hbase_records_temp_folder, seconds_timeout=context.timeout, fixture_data_folder=folder, custom_base_timestamp=date_qualified, ) files_to_send_to_kafka_broker = [ generated_file[0] for generated_file in generated_files ] aws_helper.send_files_to_kafka_producer_sns( dynamodb_table_name=context.dynamo_db_table_name, s3_input_bucket=context.s3_ingest_bucket, aws_acc_id=context.aws_acc, sns_topic_name=context.aws_sns_topic_name, fixture_files=files_to_send_to_kafka_broker, message_key=key, topic_name=topic["topic"], topic_prefix=topic_prefix, region=context.aws_region_main, )
def step_impl(context, id_format, message_type): folder = streaming_data_helper.generate_fixture_data_folder(message_type) for topic in context.topics_for_test: topic_name = template_helper.get_topic_name(topic["topic"]) temp_folder_for_topic = os.path.join(context.temp_folder, topic_name) full_folder_path = file_helper.generate_edited_files_folder( temp_folder_for_topic, folder) latest_file_path = file_helper.get_file_from_folder_with_latest_timestamp( full_folder_path) wrap_id_value = id_format == "wrapped" file_comparer.assert_specific_file_stored_in_hbase( topic_name, latest_file_path, 60, record_expected_in_hbase=False, wrap_id=wrap_id_value, )
def step_impl(context, dlq_file_template): for topic in context.topics_for_test: dlq_file = None for dlq_files_and_topic_tuple in context.kafka_generated_dlq_output_files: if topic["topic"] == dlq_files_and_topic_tuple[0]: for dlq_file_for_topic in dlq_files_and_topic_tuple[1]: if dlq_file_template in dlq_file_for_topic: dlq_file = dlq_file_for_topic if dlq_file is None: raise AssertionError( f"No generated dlq file could be found for dlq template of {dlq_file_template}" ) expected_file_content = file_helper.get_contents_of_file( dlq_file, True) id_object = file_helper.get_id_object_from_json_file(dlq_file) test_run_topic_name = template_helper.get_topic_name(topic["topic"]) file_comparer.assert_specific_id_missing_in_hbase( test_run_topic_name, id_object, 5, True)
def step_impl(context, dlq_file_template, table, id_format): wrap_id_value = id_format == "wrapped" table_name = streaming_data_helper.get_metadata_store_table_name( table, context.metadata_store_tables) for topic in context.topics_for_test: dlq_file = None for dlq_files_and_topic_tuple in context.kafka_generated_dlq_output_files: if topic["topic"] == dlq_files_and_topic_tuple[0]: for dlq_file_for_topic in dlq_files_and_topic_tuple[1]: if dlq_file_template in dlq_file_for_topic: dlq_file = dlq_file_for_topic if dlq_file is None: raise AssertionError( f"No generated dlq file could be found for dlq template of {dlq_file_template} in topic '{topic_name}'" ) topic_name = template_helper.get_topic_name(topic["topic"]) id_object = file_helper.get_id_object_from_json_file(dlq_file) id_string = json.dumps(id_object) if wrap_id_value: id_string = json.dumps({"id": id_string}) id_string_qualified = id_string.replace(" ", "") results = metadata_store_helper.get_metadata_for_specific_id_in_topic( table_name, id_string_qualified, topic_name) console_printer.print_info( f"Received {len(results)} responses in topic '{topic_name}'") console_printer.print_info( f"Actual metadata store results are '{results}' in topic '{topic_name}'" ) console_printer.print_info( f"Asserting no response received for id of '{id_string_qualified}' in topic '{topic_name}'" ) assert len(results) == 0
def assert_specific_file_stored_in_hbase_threaded( topics, output_folder, timeout, record_expected_in_hbase=True, wrap_id=False): """Checks the specific files in stored in HBase for the given topics and raises assertion errors if not using threads. Keyword arguments: topics -- full topic names as an array output_folder -- the output folder base for the generated historic data timeout -- the timeout in seconds record_expected_in_hbase -- true if the record should be in HBase and false if it should not (default True) wrap_id -- True is the id format should be wrapped with an "id" object (default False) """ with ThreadPoolExecutor(max_workers=5) as executor: future_results = [] for topic in topics: console_printer.print_info( f"Looking for HBase data for topic {topic}") output_folder_qualified = os.path.join(output_folder, topic) topic_qualified = template_helper.get_topic_name(topic) for output_file in os.listdir(output_folder_qualified): future_results.append( executor.submit( assert_specific_file_stored_in_hbase, topic_qualified, os.path.join(output_folder_qualified, output_file), timeout, record_expected_in_hbase=record_expected_in_hbase, wrap_id=wrap_id, )) wait(future_results) for future in future_results: try: yield future.result() except Exception as ex: raise AssertionError(ex)
def step_impl( context, record_count, input_file_name, output_file_name, key_method, days_offset, ): context.uploaded_id = uuid.uuid4() output_template = None if output_file_name == "None" else output_file_name for topic in context.topics_for_test: key = None if key_method.lower() == "static": key = context.uploaded_id elif key_method.lower() == "topic": key = uuid.uuid4() topic_name = template_helper.get_topic_name(topic["topic"]) timestamp_override = (datetime.now() + timedelta(days=int(days_offset)) if days_offset and days_offset.lower() != "none" else None) corporate_data_generator.generate_corporate_data_files( context.test_run_name, context.corporate_storage_s3_bucket_id, input_file_name, output_template, key, os.path.join(context.temp_folder, topic_name), context.fixture_path_local, context.cdl_data_load_s3_base_prefix_tests, record_count, topic["topic"], context.timeout, timestamp_override, )
def wait_for_statuses_in_export_status_table(timeout, export_status_table_name, topics, correlation_id, desired_statuses): """Returns true or false for if the items for the given correlation id after waiting for the timeout and topic list match given status. Keyword arguments: timeout -- the timeout in seconds export_status_table_name -- the export table name topics -- the array of topics to check correlation_id -- the correlation id desired_statuses -- an array of allowed statuses """ count = 1 matched_topics = [] console_printer.print_info( f"Checking all export statuses for all topics match one of the desired statuses of '{desired_statuses}' for correlation_id of '{correlation_id}'" ) while len(matched_topics) != len(topics) and count <= timeout: for topic in topics: if topic not in matched_topics: topic_name = template_helper.get_topic_name(topic) key_dict = { "CorrelationId": { "S": f"{correlation_id}" }, "CollectionName": { "S": f"{topic_name}" }, } item_details = aws_helper.get_item_from_dynamodb( export_status_table_name, key_dict) if "Item" not in item_details: console_printer.print_debug( f"No export status found for key dict of '{key_dict}'") continue collection_status = item_details["Item"]["CollectionStatus"][ "S"] if collection_status not in desired_statuses: console_printer.print_debug( f"Status was '{collection_status}' which did not match any of '{desired_statuses}'" ) continue console_printer.print_info( f"All export statuses match one of the desired statuses of '{desired_statuses}' for correlation_id of '{correlation_id}' and topic '{topic_name}'" ) matched_topics.append(topic) time.sleep(1) count += 1 if len(matched_topics) != len(topics): console_printer.print_info( f"All export statuses for one or more topics did match one of the desired statuses of '{desired_statuses}' for correlation_id of '{correlation_id}' after '{timeout}' seconds" ) return False console_printer.print_info( f"All export statuses match one of the desired statuses of '{desired_statuses}' for correlation_id of '{correlation_id}'" ) return True
def step_impl(context, snapshot_type): topic_names = [ template_helper.get_topic_name(topic["topic"]) for topic in context.topics_for_test ] for topic_name in topic_names: console_printer.print_info( f"Checking snapshots for topic '{topic_name}'") generated_file = ( snapshot_data_generator.generate_snapshot_file_from_hbase_records( context.test_run_name, topic_name, context.snapshot_files_hbase_records_temp_folder, context.snapshot_files_temp_folder, )) expected_records = snapshots_helper.get_locally_generated_snapshot_file_records( generated_file) expected_records.sort() console_printer.print_info( f"Checking snapshots from s3 at base path '{context.snapshot_s3_output_path}' " + f"with db name of '{context.db_name}', topic name of '{topic_name}' and date of '{context.formatted_date}'" ) s3_qualified_prefix = os.path.join( context.mongo_snapshot_path, context.test_run_name, context.formatted_date, snapshot_type, ) console_printer.print_info( f"Checking generated snapshots from s3 at path '{s3_qualified_prefix}'" ) generated_snapshots_count = len( aws_helper.get_s3_file_object_keys_matching_pattern( context.mongo_snapshot_bucket, s3_qualified_prefix, f"^{s3_qualified_prefix}/{topic_name}-\d{{3}}-\d{{3}}-\d+.txt.gz.enc$", )) console_printer.print_info( f"Found '{generated_snapshots_count}' generated snapshots from s3 at path '{s3_qualified_prefix}'" ) snapshot_s3_full_output_path = ( snapshots_helper.generate_snapshot_output_s3_path( context.snapshot_s3_output_path, topic_name, context.db_name, context.formatted_date, snapshot_type, )) console_printer.print_info( f"Checking snapshots from s3 at path '{snapshot_s3_full_output_path}'" ) if not snapshots_helper.wait_for_snapshots_to_be_sent_to_s3( context.timeout, generated_snapshots_count, context.snapshot_s3_output_bucket, snapshot_s3_full_output_path, ): raise AssertionError( f"Snapshots found at '{snapshot_s3_full_output_path}' was not above or matching expected minimum of '{generated_snapshots_count}'" ) console_printer.print_info( f"Length of the expected record array is '{len(expected_records)}'" ) console_printer.print_info( f"Getting hbase records from snapshots from s3 at path '{snapshot_s3_full_output_path}'" ) actual_records = snapshots_helper.retrieve_records_from_snapshots( context.snapshot_s3_output_bucket, snapshot_s3_full_output_path) actual_records.sort() console_printer.print_info( f"Length of the actual record array is '{len(actual_records)}'") console_printer.print_info( "Asserting the length of the two record arrays") console_printer.print_info(f"Expected: {expected_records}") console_printer.print_info(f"Actual: {actual_records}") assert len(actual_records) >= len(expected_records) missing_snapshots = [] console_printer.print_info( "Asserting the values of the expected snapshots") for expected_record_number in range(0, len(expected_records)): console_printer.print_info( f"Asserting the values of expected record number '{expected_record_number}'" ) expected_json = json.loads( expected_records[expected_record_number]) record_found = False for actual_record_number in range(0, len(actual_records)): actual_json = json.loads(actual_records[actual_record_number]) if expected_json == actual_json: console_printer.print_info( f"Expected json is '{expected_json}' and actual json (record number '{actual_record_number}') is '{actual_json}'" ) record_found = True break if not record_found: missing_snapshots.append(expected_record_number) if len(missing_snapshots) > 0: console_printer.print_info( f"The following snapshots were not found: '{missing_snapshots}'" ) console_printer.print_info("Individual assertions complete") console_printer.print_info("Asserting no records are mismatched") assert len(missing_snapshots) == 0
def step_impl(context, table): if context.data_streaming_tests_skip_reconciling: console_printer.print_warning_text( f"Not verifying reconciliation due to DATA_STREAMING_TESTS_SKIP_RECONCILING being set to '{str(context.data_streaming_tests_skip_reconciling)}'" ) return table_name = streaming_data_helper.get_metadata_store_table_name( table, context.metadata_store_tables) timeout_seconds = 600 console_printer.print_info( f"Checking that all ids are reconciled within '{str(timeout_seconds)}' seconds" ) for latest_metadata_store_id_for_topic in context.latest_metadata_store_ids: reconciled = False timeout_time = time.time() + timeout_seconds while time.time() < timeout_time and reconciled == False: topic_name = template_helper.get_topic_name( latest_metadata_store_id_for_topic[0]) console_printer.print_info( f"Checking that latest id is reconciled for topic '{topic_name}'" ) results = metadata_store_helper.get_metadata_for_specific_id_and_timestamp_in_topic( table_name, latest_metadata_store_id_for_topic[1], latest_metadata_store_id_for_topic[2], topic_name, ) assert len(results) == 1 results_iterator = iter(results.items()) result_row_key_value_pair = next(results_iterator) result_row_key = result_row_key_value_pair[0] result_row_value = result_row_key_value_pair[1] console_printer.print_info( f"Actual metadata store result is '{result_row_value}' in topic '{topic_name}'" ) console_printer.print_info( f"Asserting the reconciled field values for the result in topic '{topic_name}'" ) if result_row_value["reconciled_result"] == 1: console_printer.print_info( f"Asserting reconciled timestamp is set in topic '{topic_name}'" ) assert result_row_value["reconciled_timestamp"] is not None console_printer.print_info( f"Latest id has been reconciled for topic '{topic_name}'") reconciled = True break time.sleep(5) if reconciled == False: raise AssertionError( f"Latest id is not reconciled for topic '{topic_name}' after '{str(timeout_seconds)}' seconds" )
def step_impl(context, streaming_type, id_format, message_type): folder = streaming_data_helper.generate_fixture_data_folder(message_type) manifest_bucket = context.k2hb_manifest_write_s3_bucket valid_prefixes = { "main": context.k2hb_main_manifest_write_s3_prefix, "equalities": context.k2hb_equality_manifest_write_s3_prefix, "audit": context.k2hb_audit_manifest_write_s3_prefix, } manifest_base_prefix = valid_prefixes.get(streaming_type, "NOT_SET") if manifest_base_prefix == "NOT_SET": raise AssertionError( f"Could not find manifest prefix for streaming of '{streaming_type}'" ) for topic in context.topics_for_test: topic_name = template_helper.get_topic_name(topic["topic"]) temp_folder_for_topic = os.path.join(context.temp_folder, topic_name) full_folder_path = file_helper.generate_edited_files_folder( temp_folder_for_topic, folder) latest_file_path = file_helper.get_file_from_folder_with_latest_timestamp( full_folder_path) wrap_id_value = id_format == "wrapped" file_pattern = f"^.*_.*_\d+-.*_.*_\d+.txt$" console_printer.print_info( f"Looking for manifest files in '{manifest_bucket}' bucket with prefix of '{manifest_base_prefix}' and pattern of '{file_pattern}'" ) manifest_files = aws_helper.retrieve_files_from_s3( manifest_bucket, manifest_base_prefix, file_pattern, ) console_printer.print_info( f"Found '{len(manifest_files)}' manifest files") manifest_lines = [] for manifest_file in manifest_files: manifest_lines_in_file = manifest_file.splitlines() manifest_lines.extend([ manifest_line_in_file.replace('""', '"') for manifest_line_in_file in manifest_lines_in_file ]) record_id = file_helper.get_id_object_from_json_file(latest_file_path) record_timestamp = file_helper.get_timestamp_as_long_from_json_file( latest_file_path) expected = streaming_manifests_helper.generate_correct_manifest_line( record_id, record_timestamp, topic_name, wrap_id=wrap_id_value) console_printer.print_info( f"Expecting manifest line with data of '{expected}'") console_printer.print_info( f"Actual manifest lines were '{manifest_lines}'") assert expected in manifest_lines
def step_impl( context, record_count, message_type, input_file_name, output_file_name, dlq_file_name, snapshot_record_file_name, encrypt_in_sender, wait_for_sending, key_method, ): context.uploaded_id = uuid.uuid4() folder = streaming_data_helper.generate_fixture_data_folder(message_type) topic_prefix = streaming_data_helper.generate_topic_prefix(message_type) skip_encryption = "true" if encrypt_in_sender == "false" else "false" output_template = None if output_file_name == "None" else output_file_name dlq_template = None if dlq_file_name == "None" else dlq_file_name snapshot_record_file_name = (None if snapshot_record_file_name == "None" else snapshot_record_file_name) wait_for_sending_bool = wait_for_sending.lower() == "true" message_volume = (context.kafka_message_volume if context.kafka_message_volume else "1") random_keys = context.kafka_random_key if context.kafka_random_key else "false" context.kafka_generated_dlq_output_files = [] for topic in context.topics_for_test: key = None if key_method.lower() == "static": key = context.uploaded_id elif key_method.lower() == "topic": key = uuid.uuid4() topic_name = template_helper.get_topic_name(topic["topic"]) generated_files = kafka_data_generator.generate_kafka_files( test_run_name=context.test_run_name, s3_input_bucket=context.s3_ingest_bucket, input_template_name=input_file_name, output_template_name=output_template, new_uuid=key, local_files_temp_folder=os.path.join(context.temp_folder, topic_name), fixture_files_root=context.fixture_path_local, s3_output_prefix=context.s3_temp_output_path, record_count=record_count, topic_name=topic["topic"], snapshots_output_folder=context. snapshot_files_hbase_records_temp_folder, seconds_timeout=context.timeout, fixture_data_folder=folder, dlq_template_name=dlq_template, snapshot_record_template_name=snapshot_record_file_name, ) files_to_send_to_kafka_broker = [ generated_file[0] for generated_file in generated_files ] aws_helper.send_files_to_kafka_producer_sns( dynamodb_table_name=context.dynamo_db_table_name, s3_input_bucket=context.s3_ingest_bucket, aws_acc_id=context.aws_acc, sns_topic_name=context.aws_sns_topic_name, fixture_files=files_to_send_to_kafka_broker, message_key=context.uploaded_id, topic_name=topic["topic"], topic_prefix=topic_prefix, region=context.aws_region_main, skip_encryption=skip_encryption, kafka_message_volume=message_volume, kafka_random_key=random_keys, wait_for_job_completion=wait_for_sending_bool, ) dlq_files_for_topic = [] for generated_file in generated_files: if len(generated_file) > 3: dlq_files_for_topic.append(generated_file[3]) context.kafka_generated_dlq_output_files.append( (topic["topic"], dlq_files_for_topic))
def step_impl(context, snapshot_type): topics = message_helper.get_consolidated_topics_list( [topic["topic"] for topic in context.topics_for_test], snapshot_type, context.default_topic_list_full_delimited, context.default_topic_list_incremental_delimited, [context.send_snapshots_topics_override], ) formatted_date = (context.formatted_date if not context.send_snapshots_date_override else context.send_snapshots_date_override) correlation_id = (snapshots_helper.get_snapshot_run_correlation_id( context.test_run_name, snapshot_type) if not context.send_snapshots_correlation_id_override else context.send_snapshots_correlation_id_override) reprocess_files = context.send_snapshots_reprocess_files s3_qualified_prefix = os.path.join(context.mongo_snapshot_path, formatted_date, snapshot_type) console_printer.print_info( f"Looking for snapshots present in '{s3_qualified_prefix}'") for topic in topics: topic_qualified = template_helper.get_topic_name(topic) topic_name = template_helper.remove_any_pipe_values_from_topic_name( topic_qualified) snapshot_pattern = ( f"^{s3_qualified_prefix}/{topic_name}-\d{{3}}-\d{{3}}-\d+.txt.gz.enc$" ) console_printer.print_info( f"Looking for snapshots using pattern '{snapshot_pattern}'") generated_snapshot_keys = aws_helper.get_s3_file_object_keys_matching_pattern( context.mongo_snapshot_bucket, f"{s3_qualified_prefix}/{topic_name}-", snapshot_pattern, ) generated_snapshots_count = len(generated_snapshot_keys) console_printer.print_info( f"Found '{generated_snapshots_count}' snapshots") export_status_helper.update_item_in_export_status_table( context.dynamo_db_export_status_table_name, topic_name, correlation_id, "Exported", generated_snapshots_count, 0, 0, ) for generated_snapshot_key in generated_snapshot_keys: message_helper.send_start_snapshot_sending_message( context.aws_sqs_queue_snapshot_sender, generated_snapshot_key, topic_name, correlation_id, reprocess_files, formatted_date, snapshot_type, )