def test_safe_name(self): "Test safe_name database name sanitiser." self.assertEqual(dbutil.safe_name('Fred'), 'Fred') self.assertEqual(dbutil.safe_name('Fred_123456789'), 'Fred_123456789') self.assertEqual(dbutil.safe_name('Fred!@%&***'), 'Fred') self.assertEqual(dbutil.safe_name('Fred;drop postgres;'), 'Freddroppostgres')
def __init__(self, db1, db2, schema1='public', schema2='public', verbosity=0, output=sys.stdout): """ Positional Arguments: db1, db2: Connections to the databases to be compared. Keyword Arguments: schema1: The schema to be used for the first database (db1), defaults to 'public'. schema2: The schema to be used for the second database (db2), defaults to 'public'. verbosity: Amount of output generated if a difference is detected. 0 -- No output, just the return value. 1 -- Missing columns, mismatched primary keys, one line notification of table content differences. 2 -- As above, but prints the details of the first MAX_DIFFERENCES content differences in each table. 3 -- As above, but prints the details of all differences. Defaults to 0. output: Where the output goes. This is assumed to be a file object. Defaults to sys.stdout. """ # Set autocommit mode on the connections; retain the old settings. self.old_autocommit = (db1.autocommit, db2.autocommit) db1.autocommit = True db2.autocommit = True # Sanitise the schema names, just in case. self.schema1 = dbutil.safe_name(schema1) self.schema2 = dbutil.safe_name(schema2) # Wrap the connections to gain access to database structure queries. self.db1 = ComparisonWrapper(db1, self.schema1) self.db2 = ComparisonWrapper(db2, self.schema2) # Get the database names... self.db1_name = self.db1.database_name() self.db2_name = self.db2.database_name() # and qualify with the schema names if they are not 'public' if self.schema1 != 'public': self.db1_name = self.schema1 + '.' + self.db1_name if self.schema2 != 'public': self.db2_name = self.schema2 + '.' + self.db2_name self.report = Reporter(self.db1_name, self.db2_name, verbosity, output)
def __init__(self, db1, db2, schema1="public", schema2="public", verbosity=0, output=sys.stdout): """ Positional Arguments: db1, db2: Connections to the databases to be compared. Keyword Arguments: schema1: The schema to be used for the first database (db1), defaults to 'public'. schema2: The schema to be used for the second database (db2), defaults to 'public'. verbosity: Amount of output generated if a difference is detected. 0 -- No output, just the return value. 1 -- Missing columns, mismatched primary keys, one line notification of table content differences. 2 -- As above, but prints the details of the first MAX_DIFFERENCES content differences in each table. 3 -- As above, but prints the details of all differences. Defaults to 0. output: Where the output goes. This is assumed to be a file object. Defaults to sys.stdout. """ # Set autocommit mode on the connections; retain the old settings. self.old_autocommit = (db1.autocommit, db2.autocommit) db1.autocommit = True db2.autocommit = True # Sanitise the schema names, just in case. self.schema1 = dbutil.safe_name(schema1) self.schema2 = dbutil.safe_name(schema2) # Wrap the connections to gain access to database structure queries. self.db1 = ComparisonWrapper(db1, self.schema1) self.db2 = ComparisonWrapper(db2, self.schema2) # Get the database names... self.db1_name = self.db1.database_name() self.db2_name = self.db2.database_name() # and qualify with the schema names if they are not 'public' if self.schema1 != "public": self.db1_name = self.schema1 + "." + self.db1_name if self.schema2 != "public": self.db2_name = self.schema2 + "." + self.db2_name self.report = Reporter(self.db1_name, self.db2_name, verbosity, output)
def __init__(self, db1, db2, schema1, schema2): """ Positional Arguments: db1, db2: Connections to the databases to be compared. Keyword Arguments: schema1: The schema to be used for the first database (db1) schema2: The schema to be used for the second database (db2) """ # Set autocommit mode on the connections; retain the old settings. self.old_autocommit = (db1.autocommit, db2.autocommit) db1.autocommit = True db2.autocommit = True # Sanitise the schema names, just in case. self.schema1 = dbutil.safe_name(schema1) self.schema2 = dbutil.safe_name(schema2) # Wrap the connections to gain access to database structure queries. self.db1 = ComparisonWrapper(db1, self.schema1) self.db2 = ComparisonWrapper(db2, self.schema2) # Get the database names... self.db1_name = self.db1.database_name() self.db2_name = self.db2.database_name() # and qualify with the schema names if they are not 'public' if self.schema1 != 'public': self.db1_name = self.schema1 + '.' + self.db1_name if self.schema2 != 'public': self.db2_name = self.schema2 + '.' + self.db2_name # Set input, expected and output directores # Not used yet module = "tilecompare" suite = "TileCompare" self.input_dir = dbutil.input_directory(module, suite) self.output_dir = dbutil.output_directory(module, suite) self.expected_dir = dbutil.expected_directory(module, suite)
def compare_tables(db1, db2, table, schema1='public', schema2='public', ignore_columns=None, verbosity=0, output=sys.stdout): """Compares tables from two databases, returns True if identical. This function compares the tables named 'table' in two databases. It checks that non-ignored columns are present in both tables, and it checks that the contents match. It does not care about column ordering, but does use the primary key to order the rows for the content check. It will regard different primary keys as a significant difference, and will report an error if either table does not have a primary key. Using the primary key for ordering means that the results are dependent on the order of insertion for records which have an auto-generated key. This is a limitation of the current implementation. Preconditions: db1 and db2 are open database connections. These are assumed to be psycopg2 connections to PostgreSQL databases. table is the name of a table present in both databases. This table has a primary key in both databases. Positional Arguments: db1, db2: Connections to the databases to be compared. table: The name of the table to be compared. Keyword Arguments: schema1: The schema to be used for the first database (db1), defaults to 'public'. schema2: The schema to be used for the second database (db2), defaults to 'public'. ignore_columns: A list (or other Python iterable) of columns to be ignored. These may optionally be qualified by table e.g. 'dataset.datetime_processed'. Qualified column names for tables other than table may be present in this list, but will have no effect. The contents of the ignored columns will not be compared, and the comparison will not care if they are only in the table in one database and not the other. Defaults to an empty list. verbosity: Amount of output generated if a difference is detected. 0 -- No output, just the return value. 1 -- Missing columns, mismatched primary keys, one line notification of table content differences. 2 -- As above, but prints the details of the first MAX_DIFFERENCES content differences in each table. 3 -- As above, but prints the details of all differences. Defaults to 0. output: Where the output goes. This is assumed to be a file object. Defaults to sys.stdout. Return Value: Returns True if the tables 'table' are identical in both databases, excepting columns specifed as ignored by the arguments. Returns False otherwise. Postconditions: This function should have no side effects, except for the output generated if verbosity is greater than 0. """ if ignore_columns is None: ignore_columns = [] table = dbutil.safe_name(table) comparison = Comparison(db1, db2, schema1=schema1, schema2=schema2, verbosity=verbosity, output=output) assert comparison.db1.table_exists(table), \ ("Could not find table '%s' in database '%s'." % (table, comparison.db1_name)) assert comparison.db2.table_exists(table), \ ("Could not find table '%s' in database '%s'." % (table, comparison.db2_name)) tables_match = comparison.compare_tables(table, ignore_columns) comparison.restore_autocommit() return tables_match
def __init__(self, conn, default_schema='public'): self.default_schema = dbutil.safe_name(default_schema) dbutil.ConnectionWrapper.__init__(self, conn)
def compare_tables( db1, db2, table, schema1="public", schema2="public", ignore_columns=None, verbosity=0, output=sys.stdout ): """Compares tables from two databases, returns True if identical. This function compares the tables named 'table' in two databases. It checks that non-ignored columns are present in both tables, and it checks that the contents match. It does not care about column ordering, but does use the primary key to order the rows for the content check. It will regard different primary keys as a significant difference, and will report an error if either table does not have a primary key. Using the primary key for ordering means that the results are dependent on the order of insertion for records which have an auto-generated key. This is a limitation of the current implementation. Preconditions: db1 and db2 are open database connections. These are assumed to be psycopg2 connections to PostgreSQL databases. table is the name of a table present in both databases. This table has a primary key in both databases. Positional Arguments: db1, db2: Connections to the databases to be compared. table: The name of the table to be compared. Keyword Arguments: schema1: The schema to be used for the first database (db1), defaults to 'public'. schema2: The schema to be used for the second database (db2), defaults to 'public'. ignore_columns: A list (or other Python iterable) of columns to be ignored. These may optionally be qualified by table e.g. 'dataset.datetime_processed'. Qualified column names for tables other than table may be present in this list, but will have no effect. The contents of the ignored columns will not be compared, and the comparison will not care if they are only in the table in one database and not the other. Defaults to an empty list. verbosity: Amount of output generated if a difference is detected. 0 -- No output, just the return value. 1 -- Missing columns, mismatched primary keys, one line notification of table content differences. 2 -- As above, but prints the details of the first MAX_DIFFERENCES content differences in each table. 3 -- As above, but prints the details of all differences. Defaults to 0. output: Where the output goes. This is assumed to be a file object. Defaults to sys.stdout. Return Value: Returns True if the tables 'table' are identical in both databases, excepting columns specifed as ignored by the arguments. Returns False otherwise. Postconditions: This function should have no side effects, except for the output generated if verbosity is greater than 0. """ if ignore_columns is None: ignore_columns = [] table = dbutil.safe_name(table) comparison = Comparison(db1, db2, schema1=schema1, schema2=schema2, verbosity=verbosity, output=output) assert comparison.db1.table_exists(table), "Could not find table '%s' in database '%s'." % ( table, comparison.db1_name, ) assert comparison.db2.table_exists(table), "Could not find table '%s' in database '%s'." % ( table, comparison.db2_name, ) tables_match = comparison.compare_tables(table, ignore_columns) comparison.restore_autocommit() return tables_match
def __init__(self, conn, default_schema="public"): self.default_schema = dbutil.safe_name(default_schema) dbutil.ConnectionWrapper.__init__(self, conn)