def step_(context, snapshot_type, step_name): context.adg_s3_prefix = os.path.join(context.mongo_snapshot_path, context.test_run_name) context.adg_export_date = datetime.now().strftime("%Y-%m-%d") payload = { CORRELATION_ID: context.test_run_name, S3_PREFIX: context.adg_s3_prefix, SNAPSHOT_TYPE: snapshot_type, EXPORT_DATE: context.adg_export_date, } payload_json = json.dumps(payload) cluster_response = invoke_lambda.invoke_adg_emr_launcher_lambda( payload_json) cluster_arn = cluster_response[CLUSTER_ARN] cluster_arn_arr = cluster_arn.split(":") cluster_identifier = cluster_arn_arr[len(cluster_arn_arr) - 1] cluster_identifier_arr = cluster_identifier.split("/") cluster_id = cluster_identifier_arr[len(cluster_identifier_arr) - 1] console_printer.print_info(f"Started emr cluster : '{cluster_id}'") step = aws_helper.get_emr_cluster_step(step_name, cluster_id) context.adg_cluster_id = cluster_id step_id = step["Id"] console_printer.print_info(f"Step id for '{step_name}' : '{step_id}'") if step is not None: execution_state = aws_helper.poll_emr_cluster_step_status( step_id, cluster_id, 2500) if execution_state != COMPLETED_STATUS: raise AssertionError( f"'{step_name}' step failed with final status of '{execution_state}'" )
def s3_clear_snapshot(context, timeout=30, **kwargs): console_printer.print_info("Executing 's3_clear_snapshot' fixture") aws_helper.clear_s3_prefix( context.mongo_snapshot_bucket, os.path.join(context.mongo_snapshot_path, context.test_run_name), True, ) aws_helper.clear_s3_prefix( context.mongo_snapshot_bucket, os.path.join(context.snapshot_s3_status_path, context.test_run_name), True, ) context.add_cleanup(print, "Executing 's3_clear_snapshot' cleanup") context.add_cleanup( aws_helper.clear_s3_prefix, context.mongo_snapshot_bucket, os.path.join(context.mongo_snapshot_path, context.test_run_name), True, ) context.add_cleanup( aws_helper.clear_s3_prefix, context.mongo_snapshot_bucket, os.path.join(context.snapshot_s3_status_path, context.test_run_name), True, )
def get_metadata_for_id_and_timestamp_from_file(table_name, file_path, topic_name, wrap_id=False): """Returns the metadata for a given id and a tuple of the id and timestamp searched for. Arguments: table_name -- the table name to check file_path -- the file containing the id topic_name -- the topic name to get metadata for wrap_id -- True is the id format should be wrapped with an "id" object (default False) """ console_printer.print_info( f"Retrieving metadata for id from file in '{file_path}' in metadata table '{table_name}' with topic name of '{topic_name}'" ) qualified_topic_name = template_helper.get_topic_name(topic_name) record_id = file_helper.get_id_object_from_json_file(file_path) record_timestamp = file_helper.get_timestamp_as_long_from_json_file( file_path) id_string = json.dumps(record_id) if wrap_id: id_string = json.dumps({"id": id_string}) id_string_qualified = id_string.replace(" ", "") results = get_metadata_for_specific_id_and_timestamp_in_topic( table_name, id_string_qualified, record_timestamp, qualified_topic_name) return (id_string_qualified, record_timestamp, results)
def get_metadata_for_specific_id_and_timestamp_in_topic( table_name, id_string, timestamp, topic_name): """Returns the metadata for a given id and timestamp. Arguments: table_name -- the table name to check id_string -- the json dumped id string timestamp -- the timestamp as an int topic_name -- the topic name to get metadata for """ console_printer.print_info( f"Retrieving metadata for id of '{id_string}' in metadata table '{table_name}' with topic name of '{topic_name}' and timestamp of '{str(timestamp)}'" ) qualified_topic_name = template_helper.get_topic_name(topic_name) payload_dict = { "table-name": table_name, "hbase-id-like": id_string, "topic-name-equals": qualified_topic_name, "hbase-timestamp-equals": timestamp, } payload_json = json.dumps(payload_dict) return invoke_lambda.invoke_ingestion_metadata_query_lambda(payload_json)
def generate_script_step(emr_cluster_id, script_location, step_type, command_line_arguments=None): """Starts a step of type script and returns its id. Keyword arguments: emr_cluster_id -- the id of the cluster script_location -- the location on the EMR instance (or an S3 URI) of the script to run step_type -- the name of the step type being run (i.e. "major compaction") command_line_arguments -- the arguments to pass to the script as a string, if any """ arguments_array = [script_location] if command_line_arguments is not None: arguments_array.extend(command_line_arguments.split()) console_printer.print_info( f"Executing script step type '{step_type}' with arguments of \"{arguments_array}\"" ) bash_command = " ".join(arguments_array) console_printer.print_info( f"Converted arguments array to bash command of '{bash_command}'") step_name = f"Automated Script Step - {step_type}" return generate_local_step(emr_cluster_id, bash_command, step_name)
def s3_clear_historic_data_start(context, timeout=30, **kwargs): console_printer.print_info("Executing 's3_clear_historic_data_start' fixture") aws_helper.clear_s3_prefix( context.s3_ingest_bucket, os.path.join(context.ucfs_historic_data_prefix, context.test_run_name), True, )
def claimant_api_setup(context): console_printer.print_info("Executing 'claimant_api_setup' fixture") context.execute_steps( f"given The claimant API 'business' region is set to 'Ireland'" ) context.execute_steps(f"given The claimant API 'storage' region is set to 'London'") context.execute_steps(f"given The nino salt has been retrieved")
def generate_snapshot_file_from_hbase_records(test_run_name, topic, hbase_records_folder, output_folder): """Generate raw snapshot file from hbase db object record for snapshot sender output comparisons. Keyword arguments: test_run_name -- unique name for this test run topic -- singular topic hbase_records_folder -- location for files containing a singular hbase record output_folder -- output folder for snapshot records """ console_printer.print_info( f"Generating snapshot output for topic '{topic}' and folder '{hbase_records_folder}'" ) hbase_records_folder_for_topic = os.path.join(hbase_records_folder, topic) snapshot_file_name = f"{topic}-snapshot-{test_run_name}.txt.gz.enc" hbase_records = file_helper.get_contents_of_files_in_folder( hbase_records_folder_for_topic, False) snapshot_file = os.path.join(output_folder, snapshot_file_name) with open(snapshot_file, "wt") as file_to_write: file_to_write.write("\n".join(hbase_records)) return snapshot_file
def historic_data_importer_start_data_load(context, timeout=30, **kwargs): console_printer.print_info( "Executing 'historic_data_importer_start_data_load' fixture" ) historic_data_importer_start_base( context, context.mongo_data_load_prefixes_comma_delimited )
def retrieve_assessment_periods_from_claimant_data_file( input_data_file_name, fixture_files_root, fixture_data_folder): """Gets all the assessment periods from the given data file and returns them as an array of arrays Keyword arguments: input_data_file_name -- the input file name containing the data fixture_files_root -- the local path to the feature file to send fixture_data_folder -- the folder from the root of the fixture data """ data_file_name = os.path.join(fixture_files_root, fixture_data_folder, input_data_file_name) console_printer.print_info( f"Retrieving assessment periods from data file at '{data_file_name}'") return_data = [] input_data = yaml.safe_load(open(data_file_name)) for item in input_data: if "assessment_periods" in item: for assessment_period in item["assessment_periods"]: return_data.append(assessment_period) console_printer.print_info( f"Successfully retrieved '{len(return_data)}' assessment periods") return return_data
def clean_up_role_and_s3_objects(context, timeout=30, **kwargs): console_printer.print_info("Executing 'clean_up_role_and_s3_objects' fixture") aws_helper.remove_role( context.analytical_test_e2e_role, context.analytical_test_e2e_policies ) aws_helper.clear_session() aws_helper.set_details_for_role_assumption( context.aws_role, context.aws_session_timeout_seconds ) if context.analytical_test_data_s3_location.get("path"): aws_helper.remove_file_from_s3_and_wait_for_consistency( context.published_bucket, os.path.join( context.analytical_test_data_s3_location["path"], context.analytical_test_data_s3_location["file_name"], ), ) if context.analytical_test_data_s3_location.get("paths"): for path in context.analytical_test_data_s3_location["paths"]: aws_helper.remove_file_from_s3_and_wait_for_consistency( context.published_bucket, os.path.join( path, context.analytical_test_data_s3_location["file_name"] ), )
def ingest_ecs_cluster_start(context, timeout=30, **kwargs): console_printer.print_info("Executing 'ingestion_ecs_cluster_start' fixture") context.last_scaled_asg = aws_helper.scale_asg_if_desired_count_is_not_already_set( context.asg_prefix_ingestion_ecs_cluster, int(context.asg_max_count_ingestion_ecs_cluster), )
def generate_test_run_topics(context): """Generates the topics in use for this test run. Keyword arguments: context -- the behave context object """ console_printer.print_info(f"Generating the topics for this test run") if context.config.userdata.get("IS_SYNTHETIC_DATA_INGESTION"): context.topics = template_helper.generate_synthetic_data_topic_names( context.synthetic_rawdata_prefix, context.aws_datasets_bucket ) else: context.topics = template_helper.generate_topic_names( context.test_run_name, int(context.number_of_topics_to_use), context.db_name, False, ) context.topics_delimited = ",".join(context.topics) context.topics_unique = template_helper.generate_topic_names( context.test_run_name, int(context.number_of_topics_to_use), context.db_name, True, ) context.topics_unique_delimited = ",".join(context.topics_unique) console_printer.print_info(f"Generated the topics for this test run")
def s3_clear_pdm_start(context, timeout=30, **kwargs): console_printer.print_info("Executing 's3_clear_pdm_start' fixture") aws_helper.clear_s3_prefix( context.published_bucket, os.path.join(context.fixture_path_local, "pdm-test-data"), True, )
def s3_clear_published_bucket_pdm_test_output(context, timeout=30, **kwargs): console_printer.print_info( "Executing 's3_clear_published_bucket_pdm_test_output' fixture" ) aws_helper.clear_s3_prefix( context.published_bucket, context.pdm_test_output_s3_prefix, False )
def before_scenario(context, scenario): global current_scenario console_printer.print_info("Executing before scenario hook") # Create temp scenario folder context.temp_folder = os.path.join(context.root_temp_folder, str(uuid.uuid4())) console_printer.print_info( f"Creating temp scenario folder at '{context.temp_folder}'") os.makedirs(context.temp_folder) # Reset topics each scenario context.topics_for_test = [] for topic in context.topics: context.topics_for_test.append({ "topic": topic, "key": str(uuid.uuid4()) }) context.snapshot_files_temp_folder = os.path.join(context.temp_folder, "snapshots") os.makedirs(context.snapshot_files_temp_folder) context.snapshot_files_hbase_records_temp_folder = os.path.join( context.temp_folder, "hbase-records") os.makedirs(context.snapshot_files_hbase_records_temp_folder) context.historic_data_generations_count_per_test = 0 # Set the scenario current_scenario = scenario
def step_impl(context, number_of_snapshots, match_type, snapshot_type): s3_qualified_prefix = os.path.join( context.mongo_snapshot_path, context.test_run_name, context.formatted_date, snapshot_type, ) topic_names = [ template_helper.get_topic_name(topic["topic"]) for topic in context.topics_for_test ] snapshot_count = int(number_of_snapshots) if snapshot_count == 0: for result in aws_helper.assert_no_snapshots_in_s3_threaded( topic_names, context.mongo_snapshot_bucket, s3_qualified_prefix, 60): console_printer.print_info( f"Asserted no snapshots created in s3 with key of {result}") else: snapshot_string = "snapshots" if snapshot_count > 1 else "snapshot" for result in aws_helper.assert_snapshots_in_s3_threaded( topic_names, context.mongo_snapshot_bucket, s3_qualified_prefix, snapshot_count, context.timeout, ("exact" == match_type), ): console_printer.print_info( f"Asserted exactly {number_of_snapshots} {snapshot_string} created in s3 with key of {result}" )
def dynamodb_clear_ingest_start(context, snapshot_type, topics_list): console_printer.print_info("Executing 'dynamodb_clear_ingest_start' fixture") updated_topics = message_helper.get_consolidated_topics_list( topics_list, snapshot_type, context.default_topic_list_full_delimited, context.default_topic_list_incremental_delimited, [ context.generate_snapshots_topics_override, context.send_snapshots_topics_override, ], ) correlation_id = ( snapshots_helper.get_snapshot_run_correlation_id( context.test_run_name, snapshot_type ) if not context.send_snapshots_correlation_id_override else context.send_snapshots_correlation_id_override ) for topic in updated_topics: topic_name = template_helper.get_topic_name(topic) export_status_helper.delete_item_in_export_status_table( context.dynamo_db_export_status_table_name, topic_name, correlation_id )
def before_feature(context, feature): global current_feature console_printer.print_info("Executing before feature hook") # Set the feature current_feature = feature
def before_all(context): # Ensure all required userdata was supplied console_printer.set_log_level_info() # Set variables that are simply from user data and string manipulations environment_helper.set_test_run_common_variables(context) environment_helper.set_manifest_variables(context) # Assume AWS role before any calls aws_helper.set_details_for_role_assumption( context.aws_role, context.aws_session_timeout_seconds) # Generate the topics for this test run environment_helper.generate_test_run_topics(context) # Clear and delete root temp folder if it exists if os.path.exists(context.root_temp_folder): console_printer.print_info( f"Clearing out root temp folder at '{context.root_temp_folder}'") file_helper.clear_and_delete_directory(context.root_temp_folder) # Create the root temp folder for test run console_printer.print_info( f"Creating temp root folder at '{context.root_temp_folder}'") os.makedirs(context.root_temp_folder)
def after_feature(context, feature): global failed_feature console_printer.print_info("Executing after feature hook") # Set the feature if feature.status == Status.failed: failed_feature = feature
def ucfs_claimant_kafka_consumer_stop(context, timeout=30, **kwargs): console_printer.print_info("Executing 'ucfs_claimant_kafka_consumer_stop' fixture") aws_helper.scale_ecs_service_if_desired_count_is_not_already_set( context.ucfs_claimant_api_kafka_consumer_cluster_name, context.ucfs_claimant_api_kafka_consumer_service_name, 0, )
def s3_clear_corporate_data_start(context, timeout=30, **kwargs): console_printer.print_info("Executing 's3_clear_corporate_data_start' fixture") aws_helper.clear_s3_prefix( context.corporate_storage_s3_bucket_id, context.cdl_data_load_s3_base_prefix_tests, True, )
def hbase_clear_ingest_equalities_start(context, timeout=30, **kwargs): console_printer.print_info( "Executing 'hbase_clear_ingest_equalities_start' fixture" ) for topic in streaming_data_helper.generate_topics_override( "kafka_equalities", context.topics ): aws_helper.truncate_hbase_table(topic["topic"])
def step_impl(context, module_name): schema_config = context.kickstart_schema_config[module_name] if schema_config["record_layout"].lower() == "csv": for collection in schema_config["schema"].keys(): s3_result_key = os.path.join(context.kickstart_hive_result_path, f"e2e_{collection}.csv") console_printer.print_info(f"S3 Request Location: {s3_result_key}") file_content = aws_helper.get_s3_object( None, context.published_bucket, s3_result_key).decode("utf-8") actual_content = (file_content.replace("\t", ",").replace( "NULL", "None").strip().splitlines()) expected_file_name = [ file for file in context.kickstart_current_run_input_files if collection in file ][0] console_printer.print_info( f"Expected File Name: {expected_file_name}") expected_content = file_helper.get_contents_of_file( expected_file_name, False).splitlines()[1:] for input_line, output_line in zip(actual_content, expected_content): assert ( input_line.lower() == output_line.lower() ), f"Expected result of '{input_line}', does not match '{output_line}' for collection {collection}" elif schema_config["record_layout"].lower() == "json": for collection in schema_config["schema"].keys(): s3_result_key = os.path.join(context.kickstart_hive_result_path, f"e2e_{collection}.csv") console_printer.print_info(f"S3 Request Location: {s3_result_key}") file_content = aws_helper.get_s3_object( None, context.published_bucket, s3_result_key).decode("utf-8") actual_content = file_content.replace("NULL", "None").strip().splitlines() console_printer.print_info( f"This the local file name in the list: {context.kickstart_current_run_input_files}" ) expected_file_name = [ file for file in context.kickstart_current_run_input_files if f"{module_name}-{collection}" in file ][0] console_printer.print_info( f"Expected File Name: {expected_file_name}") expected_json = json.loads( file_helper.get_contents_of_file(expected_file_name, False))["data"] expected_content = "\n".join([ "\t".join([str(record[field]) for field in record]) for record in expected_json ]).splitlines() for input_line, output_line in zip(actual_content, expected_content): assert ( input_line.lower() == output_line.lower() ), f"Expected result of '{input_line}', does not match '{output_line}' for collection {collection}"
def generate_historic_load_data( s3_bucket, s3_prefix, method, file_count, record_count, topic, input_template, encrypted_key, plaintext_key, master_key, input_folder, max_worker_count, static_key=None, ): """Generates required historic data files from the files in the given folder. Keyword arguments: s3_bucket -- the s3 bucket to send input files to s3_prefix -- the s3 prefix to send input files to method -- 'single', 'different', 'file' or 'record' for the key method to use for this data file_count -- the number of files to create for the collection record_count -- the number of records per file topic -- the topic (contains db.collection) input_template -- the name and location for the input template json file encrypted_key -- the encrypted version of the plaintext key plaintext_key -- the plaintext data key for encrypting the data file master_key -- the master key used to encrypt the data key input_folder -- the folder to store the generated input files in static_key -- the key to use for the messages or None max_worker_count -- max thread number """ global key_method key_method = method _generate_keys(file_count, record_count, static_key) console_printer.print_info( f"Generating {str(file_count)} load files with {str(record_count)} records in each using key method {key_method} for topic {topic}" ) short_topic = template_helper.get_short_topic_name(topic) for result_input in _generate_input_data_files_threaded( s3_bucket, s3_prefix, file_count, record_count, short_topic, input_template, encrypted_key, plaintext_key, master_key, input_folder, max_worker_count, ): console_printer.print_info(f"Generated input file {result_input}")
def generate_input_file( file_number, input_base_content, input_folder, input_file, record_count, file_count, encrypted_key, plaintext_key, master_key, initialisation_vector, ): """Creates a local input file for historic data and returns the location and file name. Keyword arguments: file_number -- the number of the current file per topic input_base_content -- the content of the input template file input_folder -- the location to create input files in input_file -- the filename to create record_count -- the number of records per file file_count -- the number of desired files encrypted_key -- the encrypted version of the plaintext key plaintext_key -- the plaintext data key for encrypting the data file master_key -- the master key used to encrypt the data key initialisation_vector -- the initialisation vector to use for the encryption """ global keys global key_method console_printer.print_info( f"Generating input file number {str(file_number)}") local_keys = keys file_contents = "" file_full_path = os.path.join(input_folder, input_file) for record_number in range(1, int(record_count) + 1): current_key_index = get_current_key_index(key_method, file_number, record_number, record_count) key_for_record = local_keys[current_key_index][0] (timestamp, timestamp_string) = date_helper.add_milliseconds_to_timestamp( _base_datetime_timestamp, file_number + record_number, True) db_object = generate_uncrypted_record(timestamp_string, input_base_content, key_for_record, plaintext_key) file_contents += json.dumps(json.loads(db_object)) + "\n" encrypted_contents = generate_encrypted_record(initialisation_vector, file_contents, plaintext_key, True) with open(file_full_path, "wb") as data: data.write(encrypted_contents)
def step_impl(context): for step in context.kickstart_step_ids: console_printer.print_info( f"check if the step with {step} is complete or not") execution_state = aws_helper.poll_emr_cluster_step_status( step, context.kickstart_adg_cluster_id, 1200) if execution_state != COMPLETED_STATUS: raise AssertionError( f"The step Id {step} failed with final status of '{execution_state}'" )
def step_impl(context, setting, skip_existing): run_import_value = ( True if setting == "import" or setting == "import and manifest" else False ) run_manifest_value = ( True if setting == "manifest" or setting == "import and manifest" else False ) skip_existing_value = ( context.historic_data_ingestion_skip_existing_records_override is not None and context.historic_data_ingestion_skip_existing_records_override.lower() == "true" ) skip_earlier_than = ( None if not context.historic_data_ingestion_skip_earlier_than_override else context.historic_data_ingestion_skip_earlier_than_override ) skip_later_than = ( None if not context.historic_data_ingestion_skip_later_than_override else context.historic_data_ingestion_skip_later_than_override ) console_printer.print_info( "UC historic data import prefixes: " + f"{context.mongo_data_load_prefixes_comma_delimited}" ) all_prefixes = template_helper.get_historic_data_importer_prefixes( context.mongo_data_load_prefixes_comma_delimited, context.historic_importer_use_one_message_per_path, ) correlation_id = ( context.test_run_name if not context.historic_importer_correlation_id_override else context.historic_importer_correlation_id_override ) for prefix in all_prefixes: console_printer.print_info( f"Sending work to historic importer for {prefix} in {context.s3_ingest_bucket}" ) message_helper.send_start_import_message( context.aws_sqs_queue_historic_data_importer, prefix, skip_earlier_than, skip_later_than, context.test_run_name, run_import=run_import_value, generate_manifest=run_manifest_value, skip_existing_records=skip_existing_value, correlation_id=correlation_id, )
def s3_clear_k2hb_manifests_main(context, timeout=30, **kwargs): console_printer.print_info("Executing 's3_clear_k2hb_manifests_main' fixture") console_printer.print_info( f"Clearing manifests from '{context.k2hb_manifest_write_s3_bucket}/{context.k2hb_main_manifest_write_s3_prefix}'" ) aws_helper.clear_s3_prefix( context.k2hb_manifest_write_s3_bucket, context.k2hb_main_manifest_write_s3_prefix, True, False, )