def anonymize_database(self, database_strategy): """ Anonymize a restored database using the passed database strategy :param database_strategy: a strategy.DatabaseStrategy configuration :return: """ qualifier_map = database_strategy.fake_update_qualifier_map if len(qualifier_map) > 0: self.logger.info("creating seed table with %d columns", len(qualifier_map)) create_seed_table_sql = query_factory.get_create_seed_table(SEED_TABLE_NAME, qualifier_map) self.__runner.db_execute(create_seed_table_sql) self.logger.info("Inserting seed data") self.__seed(qualifier_map) self.__run_scripts(database_strategy.before_scripts, "before") table_strategies = database_strategy.table_strategies self.logger.info("Anonymizing %d tables", len(table_strategies)) with tqdm(desc="Anonymizing database", total=len(table_strategies)) as progressbar: for table_strategy in table_strategies: if table_strategy.schema is not None: self.logger.warning( "%s: MySQL provider does not support table schema. This option will be ignored.", table_strategy.table_name ) if table_strategy.strategy_type == TableStrategyTypes.TRUNCATE: progressbar.set_description("Truncating {}".format(table_strategy.table_name)) self.__runner.db_execute(query_factory.get_truncate_table(table_strategy.table_name)) elif table_strategy.strategy_type == TableStrategyTypes.DELETE: progressbar.set_description("Deleting {}".format(table_strategy.table_name)) self.__runner.db_execute(query_factory.get_delete_table(table_strategy.table_name)) elif table_strategy.strategy_type == TableStrategyTypes.UPDATE_COLUMNS: progressbar.set_description("Anonymizing {}".format(table_strategy.table_name)) statements = query_factory.get_update_table(SEED_TABLE_NAME, table_strategy) self.__runner.db_execute(statements) else: raise UnsupportedTableStrategyError(table_strategy) progressbar.update() self.__run_scripts(database_strategy.after_scripts, "after") self.logger.info("dropping seed table") self.__runner.db_execute(query_factory.get_drop_seed_table(SEED_TABLE_NAME)) # Wait an arbitrary amount of time here to prevent this step from interacting with # transactional dump operations self.logger.debug("Waiting for trailing operations to complete...") sleep(0.2)
def test_get_create_seed_table_no_columns(): """ get_create_seed_table should error when presented with no columns """ with pytest.raises(ValueError) as e_info: query_factory.get_create_seed_table("seed_table", {})
def test_get_create_seed_table(qualifier_column_map): assert ( query_factory.get_create_seed_table("seed_table", qualifier_column_map) == "CREATE TABLE `seed_table` (`first_name` TEXT,`last_name` INT,`first_name_test_arg_5` TEXT);" )