Exemplo n.º 1
0
    def anonymize_database(self, database_strategy):
        """
        Anonymize a restored database using the passed database strategy
        :param database_strategy: a strategy.DatabaseStrategy configuration
        :return:
        """
        qualifier_map = database_strategy.fake_update_qualifier_map

        if len(qualifier_map) > 0:
            self.logger.info("creating seed table with %d columns", len(qualifier_map))
            create_seed_table_sql = query_factory.get_create_seed_table(SEED_TABLE_NAME, qualifier_map)
            self.__runner.db_execute(create_seed_table_sql)

            self.logger.info("Inserting seed data")
            self.__seed(qualifier_map)

        self.__run_scripts(database_strategy.before_scripts, "before")

        table_strategies = database_strategy.table_strategies
        self.logger.info("Anonymizing %d tables", len(table_strategies))

        with tqdm(desc="Anonymizing database", total=len(table_strategies)) as progressbar:
            for table_strategy in table_strategies:
                if table_strategy.schema is not None:
                    self.logger.warning(
                        "%s: MySQL provider does not support table schema. This option will be ignored.",
                        table_strategy.table_name
                    )

                if table_strategy.strategy_type == TableStrategyTypes.TRUNCATE:
                    progressbar.set_description("Truncating {}".format(table_strategy.table_name))
                    self.__runner.db_execute(query_factory.get_truncate_table(table_strategy.table_name))
                
                elif table_strategy.strategy_type == TableStrategyTypes.DELETE:
                    progressbar.set_description("Deleting {}".format(table_strategy.table_name))
                    self.__runner.db_execute(query_factory.get_delete_table(table_strategy.table_name))

                elif table_strategy.strategy_type == TableStrategyTypes.UPDATE_COLUMNS:
                    progressbar.set_description("Anonymizing {}".format(table_strategy.table_name))
                    statements = query_factory.get_update_table(SEED_TABLE_NAME, table_strategy)
                    self.__runner.db_execute(statements)

                else:
                    raise UnsupportedTableStrategyError(table_strategy)

                progressbar.update()

        self.__run_scripts(database_strategy.after_scripts, "after")

        self.logger.info("dropping seed table")
        self.__runner.db_execute(query_factory.get_drop_seed_table(SEED_TABLE_NAME))

        # Wait an arbitrary amount of time here to prevent this step from interacting with 
        # transactional dump operations
        self.logger.debug("Waiting for trailing operations to complete...")
        sleep(0.2)
Exemplo n.º 2
0
    def anonymize_database(self, database_strategy):
        """
        Anonymize a restored database using the passed database strategy
        :param database_strategy: a strategy.DatabaseStrategy configuration
        :return:
        """
        qualifier_map = database_strategy.fake_update_qualifier_map

        if len(qualifier_map) > 0:
            self.logger.info("creating seed table with %d columns",
                             len(qualifier_map))
            create_seed_table_sql = query_factory.get_create_seed_table(
                SEED_TABLE_NAME, qualifier_map)
            self.__runner.db_execute(create_seed_table_sql)

            self.logger.info("Inserting seed data")
            self.__seed(qualifier_map)

        self.__run_scripts(database_strategy.before_scripts, "before")

        table_strategies = database_strategy.table_strategies
        self.logger.info("Anonymizing %d tables", len(table_strategies))

        with tqdm(desc="Anonymizing database",
                  total=len(table_strategies)) as progressbar:
            for table_strategy in table_strategies:
                if table_strategy.strategy_type == TableStrategyTypes.TRUNCATE:
                    progressbar.set_description("Truncating {}".format(
                        table_strategy.qualified_name))
                    self.__runner.db_execute(
                        query_factory.get_truncate_table(table_strategy))

                elif table_strategy.strategy_type == TableStrategyTypes.DELETE:
                    progressbar.set_description("Deleting {}".format(
                        table_strategy.qualified_name))
                    self.__runner.db_execute(
                        query_factory.get_delete_table(table_strategy))

                elif table_strategy.strategy_type == TableStrategyTypes.UPDATE_COLUMNS:
                    progressbar.set_description("Anonymizing {}".format(
                        table_strategy.qualified_name))
                    statements = query_factory.get_update_table(
                        SEED_TABLE_NAME, table_strategy)
                    self.__runner.db_execute(statements)

                else:
                    raise UnsupportedTableStrategyError(table_strategy)

                progressbar.update()

        self.__run_scripts(database_strategy.after_scripts, "after")

        self.logger.info("dropping seed table")
        self.__runner.db_execute(
            query_factory.get_drop_seed_table(SEED_TABLE_NAME))
Exemplo n.º 3
0
    def anonymize_database(self, database_strategy):
        qualifier_map = database_strategy.fake_update_qualifier_map

        if len(qualifier_map) > 0:
            self.logger.info("creating seed table with %d columns", len(qualifier_map))
            self.__create_seed_table(qualifier_map)

            self.logger.info("Inserting seed data")
            self.__seed(qualifier_map)

        self.__run_scripts(database_strategy.before_scripts, "before")

        table_strategies = database_strategy.table_strategies
        self.logger.info("Anonymizing %d tables", len(table_strategies))

        with tqdm(desc="Anonymizing database", total=len(table_strategies)) as progressbar:
            for table_strategy in table_strategies:
                table_name = table_strategy.table_name
                schema_prefix = f"[{table_strategy.schema}]." if table_strategy.schema else ""

                if table_strategy.strategy_type == TableStrategyTypes.TRUNCATE:
                    progressbar.set_description("Truncating {}".format(table_name))
                    self.__db_execute("TRUNCATE TABLE {}[{}];".format(schema_prefix, table_name))

                elif table_strategy.strategy_type == TableStrategyTypes.DELETE:
                    progressbar.set_description("Deleting {}".format(table_name))
                    self.__db_execute("DELETE FROM {}[{}];".format(schema_prefix, table_name))

                elif table_strategy.strategy_type == TableStrategyTypes.UPDATE_COLUMNS:
                    progressbar.set_description("Anonymizing {}".format(table_name))
                    where_grouping = table_strategy.group_by_where()
                    total_wheres = len(where_grouping)

                    for i, (where, column_map) in enumerate(where_grouping.items()):
                        column_assignments = ",".join(["[{}] = {}".format(name, self.__get_column_subquery(column)) for name, column in column_map.items()])
                        where_clause = f" WHERE {where}" if where else ""
                        progressbar.set_description("Anonymizing {}: w[{}/{}]".format(table_name, i+1, total_wheres))
                        self.__db_execute("UPDATE {}[{}] SET {}{};".format(schema_prefix, table_name, column_assignments, where_clause))

                else:
                    raise UnsupportedTableStrategyError(table_strategy)

                progressbar.update()

        self.__run_scripts(database_strategy.after_scripts, "after")

        self.logger.info("Dropping seed table")
        self.__drop_seed_table()
Exemplo n.º 4
0
    def anonymize_database(self, database_strategy):
        qualifier_map = database_strategy.fake_update_qualifier_map

        if len(qualifier_map) > 0:
            self.logger.info("creating seed table with %d columns", len(qualifier_map))
            self.__create_seed_table(qualifier_map)

            self.logger.info("Inserting seed data")
            self.__seed(qualifier_map)

        self.__run_scripts(database_strategy.before_scripts, "before")

        table_strategies = database_strategy.table_strategies
        self.logger.info("Anonymizing %d tables", len(table_strategies))

        anonymization_errors = []

        with tqdm(
            desc="Anonymizing database", total=len(table_strategies)
        ) as progressbar:
            for table_strategy in table_strategies:
                try:
                    table_name = table_strategy.table_name
                    schema_prefix = (
                        f"[{table_strategy.schema}]." if table_strategy.schema else ""
                    )

                    if table_strategy.strategy_type == TableStrategyTypes.TRUNCATE:
                        progressbar.set_description("Truncating {}".format(table_name))
                        self.__db_execute(
                            "TRUNCATE TABLE {}[{}];".format(schema_prefix, table_name)
                        )

                    elif table_strategy.strategy_type == TableStrategyTypes.DELETE:
                        progressbar.set_description("Deleting {}".format(table_name))
                        self.__db_execute(
                            "DELETE FROM {}[{}];".format(schema_prefix, table_name)
                        )

                    elif (
                        table_strategy.strategy_type
                        == TableStrategyTypes.UPDATE_COLUMNS
                    ):
                        progressbar.set_description("Anonymizing {}".format(table_name))
                        where_grouping = table_strategy.group_by_where()
                        total_wheres = len(where_grouping)

                        for i, (where, column_map) in enumerate(where_grouping.items()):
                            column_assignments = ",".join(
                                [
                                    "[{}] = {}".format(
                                        name,
                                        self.__get_column_subquery(
                                            column, table_name, name
                                        ),
                                    )
                                    for name, column in column_map.items()
                                ]
                            )
                            where_clause = f" WHERE {where}" if where else ""
                            progressbar.set_description(
                                "Anonymizing {}: w[{}/{}]".format(
                                    table_name, i + 1, total_wheres
                                )
                            )
                            # Disable ANSI_WARNINGS to allow oversized fake data to be truncated without error
                            self.__db_execute(
                                "SET ANSI_WARNINGS off; UPDATE {}[{}] SET {}{}; SET ANSI_WARNINGS on;".format(
                                    schema_prefix,
                                    table_name,
                                    column_assignments,
                                    where_clause,
                                )
                            )

                    else:
                        raise UnsupportedTableStrategyError(table_strategy)

                except Exception as e:
                    anonymization_errors.append(e)
                    self.logger.exception(
                        f"Error while anonymizing table {table_strategy.qualified_name}"
                    )

                progressbar.update()

        if len(anonymization_errors) > 0:
            raise Exception("Error during anonymization")

        self.__run_scripts(database_strategy.after_scripts, "after")

        self.logger.info("Dropping seed table")
        self.__drop_seed_table()
Exemplo n.º 5
0
def test_unsupported_table_strategy():
    error = UnsupportedTableStrategyError("error message")