def main(clear): allowed_entities = config.allowed_entities(env=os.environ.get("ENVIRONMENT")) log.info(log_title(message="Migration Step: additional_data")) log.info( log_title( message=f"Source: {db_config['source_schema']}, Target: {db_config['target_schema']}, Chunk Size: {db_config['chunk_size']}" ) ) log.info(log_title(message=f"Enabled entities: {', '.join(allowed_entities)}")) log.debug(f"Working in environment: {os.environ.get('ENVIRONMENT')}") version_details = helpers.get_json_version() log.info( f"Using JSON def version '{version_details['version_id']}' last updated {version_details['last_modified']}" ) if "additional_data" not in allowed_entities: log.info("additional_data entity not enabled, exiting") return False all_files = [x[:-5] for x in helpers.get_all_additional_data_files()] if clear: clear_tables(db_config=db_config, files=all_files) for file in all_files: insert_additional_data_records( db_config=db_config, additional_data_file_name=file )
def main(clear, team): allowed_entities = config.allowed_entities(env=os.environ.get("ENVIRONMENT")) log.info( log_title(message="Integration Step: Apply Sirius business rules to Staging DB") ) log.info( log_title( message=f"Source: {db_config['source_schema']}, Target: {db_config['target_schema']}, Chunk Size: {db_config['chunk_size']}" ) ) log.info(log_title(message=f"Enabled entities: {', '.join(allowed_entities)}")) log.debug(f"Working in environment: {os.environ.get('ENVIRONMENT')}") if clear: clear_tables(db_engine=target_db_engine, db_config=db_config) insert_unique_uids(db_config=db_config, target_db_engine=target_db_engine) if environment == "local": check_row_counts.count_rows( connection_string=db_config["db_connection_string"], destination_schema=db_config["target_schema"], enabled_entities=allowed_entities, team=team, )
def main(clear): log.info(log_title(message="Integration Step: Load to Staging")) log.info( log_title( message=f"Source: {db_config['source_schema']} Target: {db_config['target_schema']}" ) ) log.info( log_title( message=f"Enabled entities: {', '.join(k for k, v in config.ENABLED_ENTITIES.items() if v is True)}" ) ) log.debug(f"Working in environment: {os.environ.get('ENVIRONMENT')}") work = [base_data, inserts] if clear: work.insert(0, clear_tables) if environment == "local": work.append(update) for item in work: thread = threading.Thread(target=item) thread.start() thread.join() log.debug(f"Result: {result}")
def setup_logging(log, verbose, log_title, bucket_name): try: log.setLevel(verbosity_levels[verbose]) log.info(f"{verbosity_levels[verbose]} logging enabled") except KeyError: log.setLevel("INFO") log.info(f"{verbose} is not a valid verbosity level") log.info(f"INFO logging enabled") log.info(log_title(message="Load CasRec: CSV to DB transfer")) log.info(log_title(message=f"s3 bucket: {bucket_name}")) log.debug(f"Working in environment: {os.environ.get('ENVIRONMENT')}")
def main(): log.info(log_title(message="Post migration db tasks")) log.info(log_title(message=f"Target: sirius.{db_config['target_schema']}")) log.info(f"Working in environment: {os.environ.get('ENVIRONMENT')}") jobs = [reset_sequences, reset_uid_sequences, reindex] for job in jobs: thread = threading.Thread(target=job) thread.start() thread.join() log.debug(f"Result: {result}")
def main(clear): allowed_entities = [ k for k, v in config.ENABLED_ENTITIES.items() if v is True ] log.info( log_title( message= "Integration Step: Reindex migrated data based on Sirius ids")) log.info( log_title( message= f"Source: {db_config['source_schema']} Target: {db_config['target_schema']}" )) log.info( log_title(message=f"Enabled entities: {', '.join(allowed_entities)}")) log.debug(f"Working in environment: {os.environ.get('ENVIRONMENT')}") log.info( f"Creating schema '{db_config['target_schema']}' if it doesn't exist") create_schema( target_db_connection=db_config["db_connection_string"], schema_name=db_config["target_schema"], ) if clear: clear_tables(db_config) enabled_tables = table_helpers.get_enabled_table_details() if "additional_data" not in allowed_entities: log.info("additional_data entity not enabled, exiting") enabled_extra_tables = {} else: enabled_extra_tables = table_helpers.get_enabled_table_details( file_name="additional_data_tables") all_enabled_tables = {**enabled_tables, **enabled_extra_tables} log.info( f"Moving data from '{db_config['source_schema']}' schema to '{db_config['target_schema']}' schema" ) move_all_tables(db_config=db_config, table_list=all_enabled_tables) log.info(f"Merge new data with existing data in Sirius") match_existing_data(db_config=db_config, table_details=all_enabled_tables) log.info(f"Reindex all primary keys") update_pks(db_config=db_config, table_details=enabled_tables) log.info(f"Reindex all foreign keys") update_fks(db_config=db_config, table_details=all_enabled_tables)
def main(audit): log.info( log_title(message="Load to Target Step: AKA do the migration already")) log.info( log_title( message= f"Source: {db_config['source_schema']} Target: sirius.{db_config['target_schema']}" )) log.info(f"Working in environment: {os.environ.get('ENVIRONMENT')}") if environment != "preproduction": amend_dev_data(db_engine=target_db_engine) tables_dict = table_helpers.get_enabled_table_details() tables_list = table_helpers.get_table_list(tables_dict) if audit == "True": log.info(f"Running Pre-Audit - Table Copies") run_audit(target_db_engine, source_db_engine, "before", log, tables_list) log.info(f"Finished Pre-Audit - Table Copies") for i, table in enumerate(tables_list): log.debug(f"This is table number {i + 1} of {len(tables_list)}") insert_data_into_target( db_config=db_config, source_db_engine=source_db_engine, target_db_engine=target_db_engine, table_name=table, table_details=tables_dict[table], ) update_data_in_target( db_config=db_config, source_db_engine=source_db_engine, table=table, table_details=tables_dict[table], ) completed_tables.append(table) if environment == "local": update_progress(module_name="load_to_sirius", completed_items=completed_tables) if audit == "True": log.info(f"Running Post-Audit - Table Copies and Comparisons") run_audit(target_db_engine, source_db_engine, "after", log, tables_list) log.info(f"Finished Post-Audit - Table Copies and Comparisons")
def runner(target_db, db_config): """ | Name | Running Order | Requires | | --------- | ------------- | -------- | | persons | 1 | | | addresses | 2 | persons | | | | | """ entity_name = "clients" if not check_entity_enabled(entity_name): return False log.info(log_title(message=entity_name)) log.debug("insert_persons_clients") insert_persons_clients( target_db=target_db, db_config=db_config, ) log.debug("insert_addresses_clients") insert_addresses_clients( target_db=target_db, db_config=db_config, ) log.debug("insert_phonenumbers_clients") insert_phonenumbers_clients( target_db=target_db, db_config=db_config, )
def run_data_tests(verbosity_level="INFO"): t = time.process_time() log.info(log_title(message="Migration Step: Test Transformed Casrec Data")) log.debug(f"Working in environment: {os.environ.get('ENVIRONMENT')}") current_path = Path(os.path.dirname(os.path.realpath(__file__))) test_path = f'{current_path / "data_tests"}' pytest_args = [test_path, """--disable-warnings""", """-r N"""] if verbosity_level == "INFO": pytest_args += ["--tb=line"] else: pytest_args += ["--tb=long", "-v", "-s"] log.info( f"Running data tests on {config.SAMPLE_PERCENTAGE}% of data with at " f"least {config.MIN_PERCENTAGE_FIELDS_TESTED}% of fields tested" ) exit_code = pytest.main(pytest_args) if exit_code == 0: log.info("All tests passed") else: log.error("Tests failed") log.info(f"Total test time: {round(time.process_time() - t, 2)}")
def main(verbose, ignore_schemas): set_logging_level(verbose) log.info(log_title(message="Prepare Target")) log.info("Perform Sirius DB Housekeeping") conn_target = psycopg2.connect(config.get_db_connection_string("target")) conn_source = psycopg2.connect( config.get_db_connection_string("migration")) delete_all_schemas(log=log, conn=conn_source, ignore_schemas=ignore_schemas) log.info("Deleted Schemas") log.debug( "(operations which need to be performed on Sirius DB ahead of the final Casrec Migration)" ) execute_sql_file(sql_path, "prepare_sirius.sql", conn_target) log.info("Roll back previous migration") if environment in ("local", "development"): max_orig_person_id = result_from_sql_file( sql_path, "get_max_orig_person_id.sql", conn_target) execute_generated_sql( sql_path, "rollback_fixtures.template.sql", "{max_orig_person_id}", max_orig_person_id, conn_target, ) conn_target.close()
def runner(target_db, db_config): """ | Name | Running Order | Requires | | ------------- | ------------- | -------------------------- | | client_death | 1 | persons (clients) | | deputy_death | 2 | persons (deputies) | | | | | """ entity_name = "death" extra_entities = ["clients", "deputies"] if not check_entity_enabled(entity_name, extra_entities): return False log.info(log_title(message=entity_name)) log.info("Inserting client_death_notifications") insert_client_death_notifications( target_db=target_db, db_config=db_config, ) log.info("Inserting deputy_death_notifications") insert_deputy_death_notifications( target_db=target_db, db_config=db_config, )
def main(verbose, team, clear): set_logging_level(verbose) log.info(log_title(message="Filter Data")) conn_source = psycopg2.connect( config.get_db_connection_string("migration")) if team: if lay_team: log.info( f"Lay Team filtering specified in env vars: Team {lay_team}") log.info( f"Overriding with Lay Team requested at runtime: Team {team}") else: log.info(f"Lay Team filtering requested at runtime: Team {team}") elif lay_team: team = lay_team log.info(f"Lay Team filtering specified in env vars: Team {team}") else: log.info(f"No filtering requested, proceed with migrating ALL.") if team: team = 'T' + team log.info(f"Deleting data not associated with {team}") execute_generated_sql( sql_path, "delete_filtered_source_data.template.sql", "{team}", team, conn_source, )
def runner(target_db, db_config): """ | Name | Running Order | Requires | | --------- | ------------- | -------- | | | | | | | | | | | | | """ entity_name = "warnings" extra_entities = ["clients", "deputies"] if not check_entity_enabled(entity_name, extra_entities): return False log.info(log_title(message=entity_name)) log.debug("insert_client_violent_warnings") insert_client_violent_warnings( target_db=target_db, db_config=db_config, ) log.debug("insert_client_special_warnings") insert_client_special_warnings( target_db=target_db, db_config=db_config, ) log.debug("insert_client_saarcheck_warnings") insert_client_saarcheck_warnings( target_db=target_db, db_config=db_config, ) log.debug("insert_client_nodebtchase_warnings") insert_client_nodebtchase_warnings( target_db=target_db, db_config=db_config, ) log.debug("insert_client_person_warning") insert_client_person_warning( target_db=target_db, db_config=db_config, ) log.debug("insert_deputy_violent_warnings") insert_deputy_violent_warnings( target_db=target_db, db_config=db_config, ) log.debug("insert_deputy_special_warnings") insert_deputy_special_warnings( target_db=target_db, db_config=db_config, ) log.debug("insert_deputy_person_warning") insert_deputy_person_warning( target_db=target_db, db_config=db_config, )
def main(staging): log.info(helpers.log_title(message="Validation")) global is_staging is_staging = staging set_validation_target() pre_validation() log.info("RUN VALIDATION") execute_sql_file(sql_path_temp, validation_sqlfile, conn_target, config.schemas["public"]) post_validation() log.info("Adding sql files to bucket...\n") # s3 = get_s3_session(session, environment, host) if ci != "true": for file in os.listdir(sql_path_temp): file_path = f"{sql_path_temp}/{file}" s3_file_path = f"validation/sql/{file}" if file.endswith(".sql"): upload_file(bucket_name, file_path, s3, log, s3_file_path) if get_exception_count() > 0: log.info("Exceptions WERE found: override / continue anyway\n") # exit(1) else: log.info("No exceptions found: continue...\n")
def main(clear, include_tests, chunk_size): log.info(log_title(message="Migration Step: Transform Casrec Data")) log.info( log_title( message= f"Source: {db_config['source_schema']} Target: {db_config['target_schema']}" )) log.info( log_title( message= f"Enabled entities: {', '.join(k for k, v in config.ENABLED_ENTITIES.items() if v is True)}" )) log.debug(f"Working in environment: {os.environ.get('ENVIRONMENT')}") version_details = helpers.get_json_version() log.info( f"Using JSON def version '{version_details['version_id']}' last updated {version_details['last_modified']}" ) db_config["chunk_size"] = chunk_size if chunk_size else 10000 log.info(f"Chunking data at {chunk_size} rows") print(f"allowed_entities: {allowed_entities}") if clear: clear_tables(db_config=db_config) clients.runner(target_db=target_db, db_config=db_config) cases.runner(target_db=target_db, db_config=db_config) bonds.runner(target_db=target_db, db_config=db_config) supervision_level.runner(target_db=target_db, db_config=db_config) deputies.runner(target_db=target_db, db_config=db_config) death.runner(target_db=target_db, db_config=db_config) events.runner(target_db=target_db, db_config=db_config) finance.runner(target_db=target_db, db_config=db_config) remarks.runner(target_db=target_db, db_config=db_config) reporting.runner(target_db=target_db, db_config=db_config) tasks.runner(target_db=target_db, db_config=db_config) teams.runner(target_db=target_db, db_config=db_config) visits.runner(target_db=target_db, db_config=db_config) warnings.runner(target_db=target_db, db_config=db_config) if include_tests: run_data_tests(verbosity_level="DEBUG") if environment == "local": update_progress(module_name="transform", completed_items=files_used) log.debug(f"Number of mapping docs used: {len(files_used)}")
def main(verbose): set_logging_level(verbose) log.info(log_title(message="Fixtures")) conn_migration = psycopg2.connect(config.get_db_connection_string("migration")) conn_target = psycopg2.connect(config.get_db_connection_string("target")) log.info("Add fixtures into Sirius DB to replicate Skeleton Clients") log.info("- Clients") client.load_fixtures(config, conn_migration, conn_target) log.info("- Addresses") address.load_fixtures(config, conn_target)
def main(clear): log.info( log_title(message="Integration Step: Apply Sirius business rules to Staging DB") ) log.info( log_title( message=f"Source: {db_config['source_schema']} Target: {db_config['target_schema']}" ) ) log.info( log_title( message=f"Enabled entities: {', '.join(k for k, v in config.ENABLED_ENTITIES.items() if v is True)}" ) ) log.debug(f"Working in environment: {os.environ.get('ENVIRONMENT')}") if clear: clear_tables(db_engine=target_db_engine, db_config=db_config) insert_unique_uids(db_config=db_config, target_db_engine=target_db_engine)
def count_rows(connection_string, destination_schema, enabled_entities, team=""): log.info( helpers.log_title( message= f"Checking row counts for schema '{destination_schema}', team: '{team if team != '' else 'all'}'" )) current_dir = get_current_directory() conn = psycopg2.connect(connection_string) cursor = conn.cursor() with open(f"{current_dir}/dev_row_counts.json", "r") as row_count_json: row_count_dict = json.load(row_count_json) for item in row_count_dict: for entity_name, entity_details in item.items(): if entity_name in enabled_entities: for query_details in entity_details: query = query_details["query"] query_schema = query.replace("{schema}", destination_schema) if team != "": expected_row_count = query_details["row_counts"][ f"team_{team}"] else: expected_row_count = query_details["row_counts"][ "all"] try: cursor.execute(query_schema) row_count = cursor.fetchall()[0][0] if row_count != expected_row_count: raise IncorrectRowCount except IncorrectRowCount: log.error( f"'{entity_name} {query_details['table_name']}' row counts do not match: " f"expected {expected_row_count}, actual {row_count}" ) except psycopg2.DatabaseError as e: log.error(e) except (Exception) as e: log.error(e) log.info("All row counts checked")
def main(verbose): try: log.setLevel(verbosity_levels[verbose]) log.info(f"{verbosity_levels[verbose]} logging enabled") except KeyError: log.setLevel("INFO") log.info(f"{verbose} is not a valid verbosity level") log.info(f"INFO logging enabled") log.info( log_title( message="Validation Step: check things that are not just data")) log.debug(f"Working in environment: {os.environ.get('ENVIRONMENT')}") enabled_table_dict = table_helpers.get_enabled_table_details() table_list = table_helpers.get_table_list(enabled_table_dict) sequence_list = table_helpers.get_sequences_list(enabled_table_dict) uid_sequence_list = table_helpers.get_uid_sequences_list( enabled_table_dict) tests = [] sequences = check_sequences(sequences=sequence_list, db_config=db_config) tests.append({"name": "Sequences Reset", "result": sequences}) uid_sequences = check_uid_sequences(sequences=uid_sequence_list, db_config=db_config) tests.append({"name": "UID Sequences Reset", "result": uid_sequences}) continuous_ids = check_continuous(table_list=table_list, db_config=db_config) tests.append({"name": "Continuous IDs", "result": continuous_ids}) duplicate_uids = get_duplicate_uids(uid_sequence_list=uid_sequence_list, db_config=db_config) tests.append({"name": "Unique UIDs", "result": duplicate_uids}) # This should be in data validation - once it's added in there remove here pls address_line_format = check_address_line_format(db_config=db_config) tests.append({ "name": "Address Line Formatting", "result": address_line_format }) report = format_report(tests) print(report)
def runner(target_db, db_config): """ | Name | Running Order | Requires | | --------- | ------------- | -------- | | | | | | | | | | | | | """ entity_name = "visits" if not check_entity_enabled(entity_name): return False log.info(log_title(message=entity_name)) log.debug("Not currently implemented")
def runner(target_db, db_config): """ | Name | Running Order | Requires | | --------- | ------------- | -------- | | persons | 1 | | | phonenumbers | 2 | persons | | | | | """ entity_name = "deputies" extra_entities = ["clients", "cases"] if not check_entity_enabled(entity_name, extra_entities): return False log.info(log_title(message=entity_name)) log.debug("insert_persons_deputies") insert_persons_deputies(target_db=target_db, db_config=db_config) log.debug("insert_phonenumbers_deputies") insert_phonenumbers_deputies_daytime( target_db=target_db, db_config=db_config, ) insert_phonenumbers_deputies_evening( target_db=target_db, db_config=db_config, ) log.debug("insert_addresses_deputies") insert_addresses_deputies( target_db=target_db, db_config=db_config, ) log.debug("insert_order_deputies") insert_order_deputies( target_db=target_db, db_config=db_config, )
def runner(db_config, target_db): """ | Name | Running Order | Requires | | --------------------------| ------------- | -------- | | supervision_level_log | 1 | cases | | | | | """ entity_name = "supervision_level" extra_entities = ["cases"] if not check_entity_enabled(entity_name, extra_entities): return False log.info(log_title(message=entity_name)) log.debug("insert_supervision_level_log") insert_supervision_level_log( db_config, target_db, )
def runner(target_db, db_config): """ | Name | Running Order | Requires | | ------------- | -------------- | ------------ | | notes | 1 | | | caseitem_note | 2 | notes, cases | | | | | """ entity_name = "remarks" extra_entities = ["cases"] if not check_entity_enabled(entity_name, extra_entities): return False log.info(log_title(message=entity_name)) log.debug("insert_notes") insert_notes(target_db=target_db, db_config=db_config) log.debug("insert_caseitem_note") insert_caseitem_note(target_db=target_db, db_config=db_config)
def main(verbose): set_logging_level(verbose) log.info(log_title(message="Integration")) log.info("Create an integration schema for use with this step") copy_schema( log=log, sql_path=shared_sql_path, from_config=config.db_config["migration"], from_schema=config.schemas["post_transform"], to_config=config.db_config["migration"], to_schema=config.schemas["integration"], ) log.info("Modify new schema") conn = psycopg2.connect(config.get_db_connection_string("migration")) execute_generated_sql( local_sql_path, "sirius_id_cols.template.sql", "{schema}", config.schemas["integration"], conn, ) conn.close()
def runner(db_config, target_db): """ | Name | Running Order | Requires | | ------------------------- | ------------- | ------------------------- | | cases | 1 | | | person_caseitem | 2 | cases, clients_persons | """ entity_name = "cases" extra_entities = ["clients"] if not check_entity_enabled(entity_name, extra_entities): return False log.info(log_title(message=entity_name)) log.debug("insert_cases") insert_cases(target_db=target_db, db_config=db_config) log.debug("insert_person_caseitem") insert_person_caseitem( target_db=target_db, db_config=db_config, )