def test_save(self): "Test database saving (and reload)." self.dbname1 = dbutil.random_name('test_save_db') self.filename1 = self.dbname1 + ".sql" # Create a new database. dbutil.TESTSERVER.create(self.dbname1, self.SAVE_DIR, self.SAVE_FILE) # Save the database to disk. dbutil.TESTSERVER.save(self.dbname1, self.SAVE_DIR, self.filename1, 'processing_level') # Now reload the file as a new database... self.dbname2 = dbutil.random_name('test_save_db_copy') dbutil.TESTSERVER.create(self.dbname2, self.SAVE_DIR, self.filename1) # and check that it exists. maint_conn = dbutil.TESTSERVER.connect(self.MAINTENANCE_DB, superuser=True) try: maint_conn = dbutil.MaintenanceWrapper(maint_conn) self.assertTrue(maint_conn.exists(self.dbname2), "Saved and reloaded database " + "does not seem to be there.") finally: maint_conn.close()
def test_save(self): "Test database saving (and reload)." self.dbname1 = dbutil.random_name('test_save_db') self.filename1 = self.dbname1 + ".sql" # Create a new database. dbutil.TESTSERVER.create(self.dbname1, self.SAVE_DIR, self.SAVE_FILE) # Save the database to disk. dbutil.TESTSERVER.save(self.dbname1, self.SAVE_DIR, self.filename1, 'processing_level') # Now reload the file as a new database... self.dbname2 = dbutil.random_name('test_save_db_copy') dbutil.TESTSERVER.create(self.dbname2, self.SAVE_DIR, self.filename1) # and check that it exists. maint_conn = dbutil.TESTSERVER.connect(self.MAINTENANCE_DB, superuser=True) try: maint_conn = dbutil.MaintenanceWrapper(maint_conn) self.assertTrue( maint_conn.exists(self.dbname2), "Saved and reloaded database " + "does not seem to be there.") finally: maint_conn.close()
def test_copy_table_between_databases(self): "Test copy of a table from one database to another database." self.dbname1 = dbutil.random_name('test_copy_db') self.dbname2 = dbutil.random_name('test_copy_db') self.filename1 = self.dbname1 + ".sql" # Create the databases. dbutil.TESTSERVER.create(self.dbname1, self.SAVE_DIR, self.SAVE_FILE) dbutil.TESTSERVER.create(self.dbname2, self.SAVE_DIR, self.SAVE_FILE) # Connect to each database conn1 = dbutil.TESTSERVER.connect(self.dbname1, superuser=True) conn2 = dbutil.TESTSERVER.connect(self.dbname2, superuser=True) conn1 = dbcompare.ComparisonWrapper(conn1) conn2 = dbcompare.ComparisonWrapper(conn2) # Create a dummy table in Database 1 table_name = 'some_dummy_table_name' sql = ("CREATE TABLE " + table_name + " AS " + "\n" + "SELECT * FROM tile_type;") with conn1.cursor() as cur: cur.execute(sql) # Verify that the table exists in Database 1. exists = conn1.table_exists(table_name) if not exists: self.fail('Table ' + table_name + ' should exist on Database 1') # Verify that the table does not exist in Database 2. exists = conn2.table_exists(table_name) if exists: self.fail('Table ' + table_name + ' should not exist in Database 2') # Copy the table from Database 1 to Database 2 dbutil.TESTSERVER.copy_table_between_databases(self.dbname1, self.dbname2, table_name) #Verify that the table does exist in Database 2. exists = conn2.table_exists(table_name) if not exists: self.fail('Table ' + table_name + ' should exist')
def test_random_name(self): "Test random_name random database name generator." basename = 'Fred' rnd_name1 = dbutil.random_name(basename) rnd_name2 = dbutil.random_name(basename) self.assertRegexpMatches(rnd_name1, r'^Fred_[\d]{9}', "Random name has unexpected format '%s'" % rnd_name1) self.assertRegexpMatches(rnd_name2, r'^Fred_[\d]{9}', "Random name has unexpected format '%s'" % rnd_name2) self.assertNotEqual(rnd_name1, rnd_name2, "Random names are equal: '%s'" % rnd_name1)
def test_random_name(self): "Test random_name random database name generator." basename = 'Fred' rnd_name1 = dbutil.random_name(basename) rnd_name2 = dbutil.random_name(basename) self.assertRegexpMatches( rnd_name1, r'^Fred_[\d]{9}', "Random name has unexpected format '%s'" % rnd_name1) self.assertRegexpMatches( rnd_name2, r'^Fred_[\d]{9}', "Random name has unexpected format '%s'" % rnd_name2) self.assertNotEqual(rnd_name1, rnd_name2, "Random names are equal: '%s'" % rnd_name1)
def test_drop(self): "Test ability to drop a database" self.dbname1 = dbutil.random_name('test_drop_db') # Create a new database. dbutil.TESTSERVER.create(self.dbname1, self.SAVE_DIR, self.SAVE_FILE) # Connect to the newly created database, to make sure it is # there, and to create a pgbouncer pool. conn = dbutil.TESTSERVER.connect(self.dbname1) try: conn = dbutil.ConnectionWrapper(conn) dbname = conn.database_name() self.assertEqual(dbname, self.dbname1) finally: conn.close() # Now drop the database... dbutil.TESTSERVER.drop(self.dbname1) # and make sure it is gone. maint_conn = dbutil.TESTSERVER.connect(self.MAINTENANCE_DB, superuser=True) try: maint_conn = dbutil.MaintenanceWrapper(maint_conn) self.assertFalse(maint_conn.exists(self.dbname1), "Dropped database still seems to be there.") finally: maint_conn.close()
def test_recreate(self): "Test ablility to recreate a database on top of itself" self.dbname1 = dbutil.random_name('test_recreate_db') # Create a new database. dbutil.TESTSERVER.create(self.dbname1, self.SAVE_DIR, self.SAVE_FILE) # Connect to the newly created database, to make sure it is # there, and to create a pgbouncer pool. conn = dbutil.TESTSERVER.connect(self.dbname1) try: conn = dbutil.ConnectionWrapper(conn) dbname = conn.database_name() self.assertEqual(dbname, self.dbname1) finally: conn.close() # Now recreate on top of the existing database... dbutil.TESTSERVER.create(self.dbname1, self.SAVE_DIR, self.SAVE_FILE) # and check that it exists. maint_conn = dbutil.TESTSERVER.connect(self.MAINTENANCE_DB, superuser=True) try: maint_conn = dbutil.MaintenanceWrapper(maint_conn) self.assertTrue(maint_conn.exists(self.dbname1), "Recreated database does not seem to be there.") finally: maint_conn.close()
def setUp(self): self.conn = None self.dbname = dbutil.random_name('test_wrapper_db') dbutil.TESTSERVER.create(self.dbname, self.SAVE_DIR, self.TEST_DB_FILE) self.conn = dbutil.TESTSERVER.connect(self.dbname) self.conn = dbcompare.ComparisonWrapper(self.conn)
def setUp(self): # # Parse out the name of the test case and use it to name a logfile # match = re.search(r'\.([^\.]+)$', self.id()) if match: name = match.group(1) else: name = 'TestIngester' logfile_name = "%s.log" % name self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name) self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name) if self.POPULATE_EXPECTED: self.logfile_path = os.path.join(self.EXPECTED_DIR, logfile_name) # # Set up a handler to log to the logfile, and attach it to the # root logger. # #logging.basicConfig() self.handler = logging.FileHandler(self.logfile_path, mode='w') self.handler.setLevel(logging.INFO) self.handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.handler) # Add a streamhandler to write output to console self.stream_handler = logging.StreamHandler(stream=sys.stdout) self.stream_handler.setLevel(logging.INFO) self.stream_handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.stream_handler) # Create an empty database self.test_conn = None self.test_dbname = dbutil.random_name("test_tile_record") LOGGER.info('Creating %s', self.test_dbname) dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # Set the datacube configuration file to point to the empty database configuration_dict = { 'dbname': self.test_dbname, 'temp_dir': self.TEMP_DIR, 'tile_root': self.TILE_ROOT_DIR } config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Set an instance of the datacube and pass it to an ingester instance test_args = TestArgs() test_args.config_file = config_file_path test_args.debug = False test_datacube = IngesterDataCube(test_args) self.ingester = TestIngester(datacube=test_datacube) self.collection = self.ingester.collection
def test_exists(self): "Test database existance check." self.assertTrue(dbutil.TESTSERVER.exists(self.MAINTENANCE_DB), "Unable to verify existance of the " + "maintenance database '%s'." % self.MAINTENANCE_DB) dummy_dbname = dbutil.random_name('dummy') self.assertFalse(dbutil.TESTSERVER.exists(dummy_dbname), "Dummy database '%s' reported as existing." % dummy_dbname)
def setUp(self): # # Parse out the name of the test case and use it to name a logfile # match = re.search(r'\.([^\.]+)$', self.id()) if match: name = match.group(1) else: name = 'TestIngester' logfile_name = "%s.log" % name self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name) self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name) if self.POPULATE_EXPECTED: self.logfile_path = os.path.join(self.EXPECTED_DIR, logfile_name) # # Set up a handler to log to the logfile, and attach it to the # root logger. # #logging.basicConfig() self.handler = logging.FileHandler(self.logfile_path, mode='w') self.handler.setLevel(logging.INFO) self.handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.handler) # Add a streamhandler to write output to console self.stream_handler = logging.StreamHandler(stream=sys.stdout) self.stream_handler.setLevel(logging.INFO) self.stream_handler.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(self.stream_handler) # Create an empty database self.test_conn = None self.test_dbname = dbutil.random_name("test_tile_record") LOGGER.info('Creating %s', self.test_dbname) dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # Set the datacube configuration file to point to the empty database configuration_dict = {'dbname': self.test_dbname, 'temp_dir': self.TEMP_DIR, 'tile_root': self.TILE_ROOT_DIR} config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Set an instance of the datacube and pass it to an ingester instance test_args = TestArgs() test_args.config_file = config_file_path test_args.debug = False test_datacube = IngesterDataCube(test_args) self.ingester = TestIngester(datacube=test_datacube) self.collection = self.ingester.collection
def test_output_directory_1(self): "Test test output directory finder/creator, test 1." dummy_user = dbutil.random_name('user') expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_user, 'output', 'module', 'suite') try: path = dbutil.output_directory('module', 'suite', user=dummy_user) self.check_directory(path, expected_path) finally: os.removedirs(path)
def test_exists(self): "Test database existance check." self.assertTrue( dbutil.TESTSERVER.exists(self.MAINTENANCE_DB), "Unable to verify existance of the " + "maintenance database '%s'." % self.MAINTENANCE_DB) dummy_dbname = dbutil.random_name('dummy') self.assertFalse( dbutil.TESTSERVER.exists(dummy_dbname), "Dummy database '%s' reported as existing." % dummy_dbname)
def test_resources_directory(self): "Test test resources directory finder/creator." dummy_user = dbutil.random_name('user') expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_user, 'test', 'module', 'suite') try: path = dbutil.resources_directory(dummy_user, 'test', 'module', 'suite') self.check_directory(path, expected_path) finally: os.removedirs(path)
def test_expected_directory_3(self): "Test test expected directory finder/creator, test 3." dummy_user = dbutil.random_name('user') expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_user, 'expected', 'module', 'suite') try: path = dbutil.expected_directory('module', 'suite', version='user', user=dummy_user) self.check_directory(path, expected_path) finally: os.removedirs(path)
def test_input_directory(self): "Test test input directory finder/creator." dummy_version = dbutil.random_name('version') input_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_version, 'input', 'module', 'suite') try: path = dbutil.input_directory('module', 'suite', version=dummy_version) self.check_directory(path, input_path) finally: os.removedirs(path)
def test_expected_directory_4(self): "Test test expected directory finder/creator, test 4." dummy_version = dbutil.random_name('version') expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_version, 'expected', 'module', 'suite') try: path = dbutil.expected_directory('module', 'suite', version=dummy_version) self.check_directory(path, expected_path) finally: os.removedirs(path)
def xxxtest_create_database(self): "Test random_name random database name generator." self.dbname1 = 'hypercube_v0' self.dbname2 = dbutil.random_name('test_create_database') LOGGER.info('Creating database %s', self.dbname2) dbutil.TESTSERVER.create(self.dbname2, self.INPUT_DIR, 'hypercube_test_ingest.sql') self.conn1 = dbutil.TESTSERVER.connect(self.dbname1) self.conn2 = dbutil.TESTSERVER.connect(self.dbname2) LOGGER.info('About to create database from file') dbutil.TESTSERVER.create(self.dbname, self.INPUT_DIR, 'hypercube_test_ingest.sql') LOGGER.info('.done')
def test_expected_directory_2(self): "Test test expected directory finder/creator, test 2." dummy_user = dbutil.random_name('user') expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_user, 'expected', 'module', 'suite') old_user = os.environ['USER'] try: os.environ['USER'] = dummy_user path = dbutil.expected_directory('module', 'suite', version='user') self.check_directory(path, expected_path) finally: os.environ['USER'] = old_user os.removedirs(path)
def test_expected_directory_5(self): "Test test expected directory finder/creator, test 5." dummy_version = dbutil.random_name('version') old_version = os.environ.get('DATACUBE_VERSION', None) expected_path = os.path.join(dbutil.TEST_RESOURCES_ROOT, dummy_version, 'expected', 'module', 'suite') try: os.environ['DATACUBE_VERSION'] = dummy_version path = dbutil.expected_directory('module', 'suite') self.check_directory(path, expected_path) finally: if old_version is None: del os.environ['DATACUBE_VERSION'] else: os.environ['DATACUBE_VERSION'] = old_version os.removedirs(path)
def test_create(self): "Test database creation and loading" self.dbname1 = dbutil.random_name('test_create_db') # Create a new database. dbutil.TESTSERVER.create(self.dbname1, self.SAVE_DIR, self.SAVE_FILE) # Check if the newly created database exists. maint_conn = dbutil.TESTSERVER.connect(self.MAINTENANCE_DB, superuser=True) try: maint_conn = dbutil.MaintenanceWrapper(maint_conn) self.assertTrue(maint_conn.exists(self.dbname1), "New database does not seem to be there.") finally: maint_conn.close()
def setUp(self): # # Parse out the name of the test case and use it to name a logfile # match = re.search(r'\.([^\.]+)$', self.id()) if match: name = match.group(1) else: name = 'TestIngester' logfile_name = "%s.log" % name self.logfile_path = os.path.join(self.OUTPUT_DIR, logfile_name) self.expected_path = os.path.join(self.EXPECTED_DIR, logfile_name) # # Set up a handler to log to the logfile, and attach it to the # root logger. # self.handler = logging.FileHandler(self.logfile_path, mode='w') self.handler.setLevel(logging.INFO) self.handler.setFormatter(logging.Formatter('%(message)s')) # Create an empty database self.test_conn = None print 'Create an empty database' self.test_dbname = dbutil.random_name("test_dataset_record") print 'Creating %s' %self.test_dbname dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # Set the datacube configuration file to point to the empty database configuration_dict = {'dbname': self.test_dbname} config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Set an instance of the datacube and pass it to an ingester instance test_args = TestArgs() test_args.config_file = config_file_path test_args.debug = False test_datacube = IngesterDataCube(test_args) self.ingester = TestIngester(datacube=test_datacube) self.collection = self.ingester.collection
def __init__(self, test_name, config): self.test_name = test_name self.result = '' self.error_message = None try: self.test_dir = self.create_dir('.', test_name) self.scenes_dir = self.create_dir(self.test_dir, 'scenes') self.remove_old_links() if config.has_option(test_name, 'scenes'): scene_list = self.link_scenes(test_name, config) else: scene_list = None # export the list of scenes as environment variables for use by # called shell script for iscene in range(len(scene_list)): os.environ['SCENE_DIR%d' %iscene] = scene_list[iscene] os.environ['Nscenes'] = str(len(scene_list)) os.environ['SYSTEST_DIR'] = self.test_dir self.temp_dir = self.create_dir(self.test_dir, 'temp') self.tile_dir = self.create_dir(self.test_dir, 'tiles') self.dbname = dbutil.random_name(test_name) self.load_initial_database(test_name, config) self.make_datacube_config(test_name, config) self.command = None if config.has_option(test_name, 'command'): self.command = config.get(test_name, 'command') os.environ['DATACUBE_ROOT'] = \ config.get(test_name, 'datacube_root') self.logfile = self.open_logfile(test_name) except AssertionError as e: self.result = 'ERROR' self.error_message = e.message
def xxxtest_create_tile_acqusition_info(self): "Test creation of tile_acquisition_info table.""" self.dbname1 = 'hypercube_test' self.dbname2 = dbutil.random_name('test_tilecompare') LOGGER.info('Creating database %s', self.dbname2) dbutil.TESTSERVER.create(self.dbname2, self.INPUT_DIR, 'hypercube_test_ingest.sql') self.conn1 = dbutil.TESTSERVER.connect(self.dbname1, autocommit=False) self.conn2 = dbutil.TESTSERVER.connect(self.dbname2, autocommit=False) LOGGER.info('About to create comparision pair') pair = tilecompare.TileComparisonPair(self.conn1, self.conn2, 'public', 'public') LOGGER.info('About to create table from fresh ingest') fresh_ingest_info_table = 'fresh_ingest_info' comparison_table = 'ingest_comparison' tilecompare._copy_ingest_tile_acquisition_info(pair, fresh_ingest_info_table) LOGGER.info('About to create comparison table') tilecompare._create_comparison_table(pair, fresh_ingest_info_table, comparison_table) LOGGER.info('About to compare the tile contents') tilecompare._compare_tile_contents(pair, comparison_table)
def test_compare_tile_stores(self): "Test creation of tile_acquisition_info table." "" self.dbname1 = 'hypercube_test2_v0' self.dbname2 = dbutil.random_name('test_tilecompare') LOGGER.info('Creating database %s', self.dbname2) dbutil.TESTSERVER.create(self.dbname2, self.INPUT_DIR, 'hypercube_test_ingest.sql') #Temp #print 'Loading production database %s' %self.dbname1 #dbutil.TESTSERVER.create(self.dbname1, '/g/data/v10/test_resources/databases', # 'hypercube_v0.sql') #self.dbname1 = 'hypercube_test2_v0' #print 'Loading production database %s' %self.dbname1 #dbutil.TESTSERVER.create(self.dbname1, '/g/data/v10/test_resources/databases', # 'hypercube_v0.sql') #return #Temp self.conn1 = dbutil.TESTSERVER.connect(self.dbname1, autocommit=False) self.conn2 = dbutil.TESTSERVER.connect(self.dbname2, autocommit=False) LOGGER.info('About to call compare_tile_stores') fout = open( os.path.join(self.OUTPUT_DIR, 'tile_comparison_output.txt'), 'w') fout = sys.stdout #temp difference_pairs = tilecompare.compare_tile_stores(self.conn1, self.conn2, output=fout) LOGGER.info('Finished calling compare_tile_stores') if difference_pairs != []: report_string = "Fresh ingest tile content differs from the " \ "benchmark:\n" for pair in difference_pairs: report_string = report_string + "Benchmark tile:\n%s\nFresh" \ "Ingest tile:\n%s\n" %(pair[0], pair[1]) self.fail(report_string) else: print 'Everything passed'
def xxxtest_create_tile_acqusition_info(self): "Test creation of tile_acquisition_info table." "" self.dbname1 = 'hypercube_test' self.dbname2 = dbutil.random_name('test_tilecompare') LOGGER.info('Creating database %s', self.dbname2) dbutil.TESTSERVER.create(self.dbname2, self.INPUT_DIR, 'hypercube_test_ingest.sql') self.conn1 = dbutil.TESTSERVER.connect(self.dbname1, autocommit=False) self.conn2 = dbutil.TESTSERVER.connect(self.dbname2, autocommit=False) LOGGER.info('About to create comparision pair') pair = tilecompare.TileComparisonPair(self.conn1, self.conn2, 'public', 'public') LOGGER.info('About to create table from fresh ingest') fresh_ingest_info_table = 'fresh_ingest_info' comparison_table = 'ingest_comparison' tilecompare._copy_ingest_tile_acquisition_info( pair, fresh_ingest_info_table) LOGGER.info('About to create comparison table') tilecompare._create_comparison_table(pair, fresh_ingest_info_table, comparison_table) LOGGER.info('About to compare the tile contents') tilecompare._compare_tile_contents(pair, comparison_table)
def test_compare_tile_stores(self): "Test creation of tile_acquisition_info table.""" self.dbname1 = 'hypercube_test2_v0' self.dbname2 = dbutil.random_name('test_tilecompare') LOGGER.info('Creating database %s', self.dbname2) dbutil.TESTSERVER.create(self.dbname2, self.INPUT_DIR, 'hypercube_test_ingest.sql') #Temp #print 'Loading production database %s' %self.dbname1 #dbutil.TESTSERVER.create(self.dbname1, '/g/data/v10/test_resources/databases', # 'hypercube_v0.sql') #self.dbname1 = 'hypercube_test2_v0' #print 'Loading production database %s' %self.dbname1 #dbutil.TESTSERVER.create(self.dbname1, '/g/data/v10/test_resources/databases', # 'hypercube_v0.sql') #return #Temp self.conn1 = dbutil.TESTSERVER.connect(self.dbname1, autocommit=False) self.conn2 = dbutil.TESTSERVER.connect(self.dbname2, autocommit=False) LOGGER.info('About to call compare_tile_stores') fout = open(os.path.join(self.OUTPUT_DIR, 'tile_comparison_output.txt'), 'w') fout = sys.stdout #temp difference_pairs = tilecompare.compare_tile_stores(self.conn1, self.conn2, output=fout) LOGGER.info('Finished calling compare_tile_stores') if difference_pairs != []: report_string = "Fresh ingest tile content differs from the " \ "benchmark:\n" for pair in difference_pairs: report_string = report_string + "Benchmark tile:\n%s\nFresh" \ "Ingest tile:\n%s\n" %(pair[0], pair[1]) self.fail(report_string) else: print 'Everything passed'
def test_landsat_tiler(self): """Test the cataloging and tiling of Landsat scences and compare resulting database and tile contents with an ingestion benchmark""" # This test is intended as an example, and so is extensively # commented. # Open a log file if self.mode not in [0, 1, 2, 3]: self.skipTest( 'Skipping test_landsat_tiler since flag is not in [0, 1, 2, 3]' ) logfile_path = os.path.join(self.OUTPUT_DIR, "test_landsat_tiler.log") self.logfile = open(logfile_path, "w") # # Create the initial database # # Randomise the name to avoid collisions with other users. self.test_dbname = dbutil.random_name("test_tiler") # Create the database. print 'About to create dbase from %s' \ %(os.path.join(self.INPUT_DIR, "hypercube_empty.sql")) if self.mode != 1: dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # # Run dbupdater on the test database and save the result # # Create updated datacube_conf file with the new dbname and tile_root tile_root = os.path.join(self.OUTPUT_DIR, "tiles") configuration_dict = { 'dbname': self.test_dbname, 'tile_root': tile_root } config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Run dbupdater ingest_dir = os.path.join(self.INPUT_DIR, 'tiler_testing') dbupdater_cmd = [ "python", "dbupdater.py", "--debug", "--config=%s" % config_file_path, "--source=%s" % ingest_dir, "--removedblist", "--followsymlinks" ] if self.mode != 1: subprocess.check_call(dbupdater_cmd, stdout=self.logfile, stderr=subprocess.STDOUT) # Run landsat_tiler landsat_tiler_cmd = [ "python", "landsat_tiler.py", "--config=%s" % config_file_path ] if self.mode != 1: subprocess.check_call(landsat_tiler_cmd, stdout=self.logfile, stderr=subprocess.STDOUT) # Save the updated database if self.mode != 1: dbutil.TESTSERVER.save(self.test_dbname, self.OUTPUT_DIR, "tiler_testing.sql") # # If an expected result exists then load it and compare # # Check for expected result if self.mode > 0 and os.path.isfile( os.path.join(self.EXPECTED_DIR, "tiler_testing.sql")): print 'starting to check differences' #MPHtemp create the output database if self.mode == 1: self.test_dbname = dbutil.random_name("tiler_testing") dbutil.TESTSERVER.create(self.test_dbname, self.OUTPUT_DIR, "tiler_testing.sql") #END MPHtemp # Create a randomised name... self.expected_dbname = dbutil.random_name("expected_tiler_testing") # load the database... dbutil.TESTSERVER.create(self.expected_dbname, self.EXPECTED_DIR, "tiler_testing.sql") # create database connections... self.test_conn = dbutil.TESTSERVER.connect(self.test_dbname) self.expected_conn = \ dbutil.TESTSERVER.connect(self.expected_dbname) # and compare. dbases_agree = dbcompare.compare_databases(self.test_conn, self.expected_conn, output=self.logfile, verbosity=3) if self.mode == 2: #Compare databases and fail test if they differ assert dbases_agree, "Databases do not match." #Compare data within corresponding files of the EXPECTED_DIR and #OUTPUT_DIR. Get list of tile pathnames from EXPECTED and OUTPUT #databases' repsective tile tables. There is an assumption here #that, within each of the expected and output databases, #the tile basename uniquely defines both the tile_type_id and the #full tile pathname. However, if the tile type table has put ORTHO #bands to be of a new tile_type, then #corresponding tiles in expected and output may be of different #tile_type. So we need to have self.bands_expected and #self.bands_output expected_tile_dict, output_tile_dict = \ self.get_tile_pathnames(self.expected_conn, self.test_conn) tiles_expected = set(expected_tile_dict.keys()) tiles_output = set(output_tile_dict.keys()) tiles_expected_or_output = tiles_expected | tiles_output #Construct band source table as per datacube module self.bands_expected = \ self.construct_bands_source_dict(self.expected_conn) self.bands_output =\ self.construct_bands_source_dict(self.test_conn) #file_pattern to parse file name for information file_pattern = [ r'(?P<sat>\w+)_(?P<sensor>\w+)_', r'(?P<processing_level>\w+)_', r'(?P<xindex>-*\d+)_(?P<yindex>-*\d+)_' r'(?P<year>\d+)-(?P<month>\d+)-' r'(?P<day>\d+)T(?P<hour>\d+)-(?P<minute>\d+)-', r'(?P<second_whole>\d+)\.(?P<second_fraction>\d+)' r'\.(?P<file_extension>.+)' ] pattern = re.compile(''.join(file_pattern)) #Set up dictionary of pixel counts to be accumulated per #(procesing_level, tile_layer) over all tiles #0: total_pixel_count_expected #1: total_pixel_count_output #2: total_pixel_count_both #3: total_pixel_count_expected_not_output #4: total_pixel_count_output_not_expected pixel_count_dict = {} #Set up nested dicts of differece counts difference_count_dict = {} #For each tile in EXPECTED_DIR and OUTPUT_DIR, get pixel counts and #difference histograms #There are five dictionaries involved: ### tile_name_dict {'sat': LS5, 'sensor': TM, ...} ### bands_dict_expected: those bands from self.bands_expected ### corresponding to current tile's ### tile_type_id and (satellite, sensor) ### bands_dict_output: output database's correspondent to ### bands_dict_expected ### level_dict_expected: those bands from bands_dict_expected ### for which the processing level ### matches that for the current tile ### level_dict_output: output database's correspondent to ### level_dict_expected ### all_levels_info_dict [level_dict_expected, #### level_dict_output] ### for each processing level all_levels_info_dict = {} for tile_name in tiles_expected_or_output: print 'processing tile %s' % tile_name tile_type_id_expected = None tile_type_id_output = None fname_expected = None fname_output = None #If tile is in either database, extract tile_type and pathname if tile_name in tiles_expected: tile_type_id_expected, fname_expected = \ expected_tile_dict[tile_name] if tile_name in tiles_output: tile_type_id_output, fname_output = \ output_tile_dict[tile_name] #Extract information from the tile name and select #nested dictionary for this tile from bands table, #given the (sat, sensor) [or("DERIVED', 'PQA') for PQA], #which will be common to expected and output tiles, and the #tile_type_id, which may be different for expected and output matchobj = re.match(pattern, tile_name) tile_name_dict = matchobj.groupdict() full_key_expected = \ self.get_tiletype_sat_sens_level(tile_type_id_expected, tile_name_dict) full_key_output = \ self.get_tiletype_sat_sens_level(tile_type_id_output, tile_name_dict) #Following will raise assertion error if a tile's #tile_type_id has changed since benchmark ingestion full_key = self.check_equal_or_null(full_key_expected, full_key_output) level_dict_expected = {} level_dict_output = {} #full_key is (tile_type, sat, sensor, processing_level) if full_key in all_levels_info_dict: (level_dict_expected, level_dict_output) = \ all_levels_info_dict[full_key] if level_dict_expected == {} and full_key_expected != None: level_dict_expected = \ self.collect_source_bands(self.bands_expected, full_key) if level_dict_output == {} and full_key_output != None: level_dict_output = \ self.collect_source_bands(self.bands_output, full_key) if full_key not in all_levels_info_dict: all_levels_info_dict[full_key] = [ level_dict_expected, level_dict_output ] if all_levels_info_dict[full_key][0] == {} and \ level_dict_expected != {}: all_levels_info_dict[full_key][0] = level_dict_expected if all_levels_info_dict[full_key][1] == {} and \ level_dict_output != {}: all_levels_info_dict[full_key][1] = level_dict_output #Check that the number of bands is as expected, adding #singleton dimension if only one band ([data_expected, data_output], number_layers) = \ self.load_and_check(fname_expected, fname_output, level_dict_expected, level_dict_output) assert bool(fname_expected) == (data_expected != None) and \ bool(fname_output) == (data_output != None), \ "data array should exist if and only if fname exists" for ilayer in range(number_layers): #Define expected and output band data band_expected, dtype_expected = \ self.get_band_data(data_expected, ilayer) band_output, dtype_output = \ self.get_band_data(data_output, ilayer) assert (band_expected == None) == (dtype_expected == None)\ and (band_output == None) == (dtype_output == None), \ "band data should exist if and only if dtype exists" dtype_this = self.check_equal_or_null( dtype_expected, dtype_output) #calculate the number of bins required to store the #histogram of differences from this datatype if tile_name_dict['processing_level'] == 'PQA': #possible difference values are 0 through 16, #(number of tests which differ) bin_count = 16 + 1 else: #possible difference vals are min through max of dtype bin_count = numpy.iinfo(dtype_this).max - \ numpy.iinfo(dtype_this).min + 1 assert bin_count < 66000, "datatype is more than 16" \ "bits, need to add code to coarsen the" \ "histogram bins or use apriori max and" \ "min values of the data" #The histograms are per (level, layer). #Could have one histogram per (sat, sensor, level, layer) #and then, depending on verbosity, aggregate during report. #But for now, just key by (level, layer). result_key = (full_key[3], ilayer + 1) if result_key not in pixel_count_dict: pixel_count_dict[result_key] = numpy.zeros( shape=(5), dtype=numpy.uint64) difference_count_dict[result_key] = \ numpy.zeros(shape=(bin_count), dtype=numpy.uint64) pixel_count = pixel_count_dict[result_key] difference_count = difference_count_dict[result_key] if tile_name_dict['processing_level'] == 'PQA': if band_expected is None: band_expected = 0 if band_output is None: band_output = 0 #define index as those pixels with contiguity bit set index_expected = \ numpy.bitwise_and(band_expected, 1 << self.PQA_CONTIGUITY_BIT) > 0 index_output = \ numpy.bitwise_and(band_output, 1 << self.PQA_CONTIGUITY_BIT) > 0 else: #For NBAR and ORTHO use nodata_value nodata_value = \ level_dict_output[ilayer + 1]['nodata_value'] if band_expected is None: band_expected = nodata_value if band_output is None: band_output = nodata_value index_expected = band_expected != nodata_value index_output = band_output != nodata_value pixel_count[0] += numpy.count_nonzero(index_expected) pixel_count[1] += numpy.count_nonzero(index_output) pixel_count[2] += \ numpy.count_nonzero(numpy.logical_and(index_expected, index_output)) pixel_count[3] += \ numpy.count_nonzero(numpy.logical_and (index_expected, ~index_output)) pixel_count[4] += \ numpy.count_nonzero(numpy.logical_and (~index_expected, index_output)) #Only want to calculate differences at common pixels index_both = numpy.logical_and(index_expected, index_output) if numpy.count_nonzero(index_both) == 0: continue valid_data_expected = band_expected[index_both].ravel() valid_data_output = band_output[index_both].ravel() #Calculate difference histogram and add to running total if tile_name_dict['processing_level'] == 'PQA': difference = \ self.count_bitwise_diffs(valid_data_expected, valid_data_output) else: difference = abs( valid_data_output.astype(numpy.int64) - valid_data_expected.astype(numpy.int64)) hist, dummy_bin_edges = \ numpy.histogram(difference, numpy.array(range(bin_count + 1), dtype=numpy.uint64)) difference_count += hist #dereference band data band_expected = None band_output = None difference = None #end of layer loop #dereference tile data data_expected = None data_output = None #Output #for sat_sen, band_dict in all_bands_dict: fp = open(os.path.join(self.OUTPUT_DIR, 'Histogram_output.txt'), 'w') fp.writelines('##### COMPARISON OF TILED DATA IN FOLLOWING '\ 'DIRECTORES\n%s\n%s\n' %(self.EXPECTED_DIR, self.OUTPUT_DIR)) result_keys_processed = [] for full_key in all_levels_info_dict.keys(): dummy, dummy, dummy, processing_level = full_key top_layer_result_key = (processing_level, 1) if top_layer_result_key in result_keys_processed: continue fp.writelines('#### Processing Level: %s\n' % processing_level) level_dict_expected, level_dict_output = \ all_levels_info_dict[full_key] assert set(level_dict_expected.keys()) == \ set(level_dict_output.keys()), "different key sets" number_layers = len(level_dict_output.keys()) for this_layer in range(1, number_layers + 1): result_key = (processing_level, this_layer) result_keys_processed.append(result_key) fp.writelines('### tile_layer = %d\n' % this_layer) for key, val in level_dict_expected[this_layer].items(): if key == 'tile_layer' or key == 'level_name': continue outline = '# %s = %s' % (key, val) if str(level_dict_output[this_layer][key]) != str(val): outline = '%s (%s in output database)' \ %(outline, level_dict_output[this_layer][key]) fp.writelines('%s\n' % outline) #get key for pixel_count_dict and difference_count_dict #Print counts of pixels with valid data fp.writelines('#Valid data counts\n') pixel_count = pixel_count_dict[result_key] count_desc = [ 'Expected\t', 'Output\t\t', 'Common\t\t', 'Missing\t\t', 'Extra\t\t' ] for desc, num in zip(count_desc, pixel_count): fp.writelines('\t\t%s%d\n' % (desc, num)) #Print histograms of differences in valid data fp.writelines('#Histogram of differences in valid data\n') difference_count = difference_count_dict[result_key] index_nonzero_bins = difference_count > 0 for bin_no in range(len(difference_count)): if index_nonzero_bins[bin_no]: fp.writelines('\t\tDifference of %d: %d\n' % (bin_no, difference_count[bin_no])) fp.close() else: if self.mode > 0: self.skipTest("Expected database save file not found.")
def test_landsat_tiler(self): """Test the cataloging and tiling of Landsat scences and compare resulting database and tile contents with an ingestion benchmark""" # This test is intended as an example, and so is extensively # commented. # Open a log file if self.mode not in [0, 1, 2, 3]: self.skipTest('Skipping test_landsat_tiler since flag is not in [0, 1, 2, 3]') logfile_path = os.path.join(self.OUTPUT_DIR, "test_landsat_tiler.log") self.logfile = open(logfile_path, "w") # # Create the initial database # # Randomise the name to avoid collisions with other users. self.test_dbname = dbutil.random_name("test_tiler") # Create the database. print 'About to create dbase from %s' \ %(os.path.join(self.INPUT_DIR, "hypercube_empty.sql")) if self.mode != 1: dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # # Run dbupdater on the test database and save the result # # Create updated datacube_conf file with the new dbname and tile_root tile_root = os.path.join(self.OUTPUT_DIR, "tiles") configuration_dict = {'dbname': self.test_dbname, 'tile_root': tile_root} config_file_path = dbutil.update_config_file2(configuration_dict, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Run dbupdater ingest_dir = os.path.join(self.INPUT_DIR, 'tiler_testing') dbupdater_cmd = ["python", "dbupdater.py", "--debug", "--config=%s" % config_file_path, "--source=%s" % ingest_dir, "--removedblist", "--followsymlinks"] if self.mode != 1: subprocess.check_call(dbupdater_cmd, stdout=self.logfile, stderr=subprocess.STDOUT) # Run landsat_tiler landsat_tiler_cmd = ["python", "landsat_tiler.py", "--config=%s" % config_file_path] if self.mode != 1: subprocess.check_call(landsat_tiler_cmd, stdout=self.logfile, stderr=subprocess.STDOUT) # Save the updated database if self.mode != 1: dbutil.TESTSERVER.save(self.test_dbname, self.OUTPUT_DIR, "tiler_testing.sql") # # If an expected result exists then load it and compare # # Check for expected result if self.mode > 0 and os.path.isfile(os.path.join(self.EXPECTED_DIR, "tiler_testing.sql")): print 'starting to check differences' #MPHtemp create the output database if self.mode == 1: self.test_dbname = dbutil.random_name("tiler_testing") dbutil.TESTSERVER.create(self.test_dbname, self.OUTPUT_DIR, "tiler_testing.sql") #END MPHtemp # Create a randomised name... self.expected_dbname = dbutil.random_name("expected_tiler_testing") # load the database... dbutil.TESTSERVER.create(self.expected_dbname, self.EXPECTED_DIR, "tiler_testing.sql") # create database connections... self.test_conn = dbutil.TESTSERVER.connect(self.test_dbname) self.expected_conn = \ dbutil.TESTSERVER.connect(self.expected_dbname) # and compare. dbases_agree = dbcompare.compare_databases(self.test_conn, self.expected_conn, output=self.logfile, verbosity=3) if self.mode == 2: #Compare databases and fail test if they differ assert dbases_agree, "Databases do not match." #Compare data within corresponding files of the EXPECTED_DIR and #OUTPUT_DIR. Get list of tile pathnames from EXPECTED and OUTPUT #databases' repsective tile tables. There is an assumption here #that, within each of the expected and output databases, #the tile basename uniquely defines both the tile_type_id and the #full tile pathname. However, if the tile type table has put ORTHO #bands to be of a new tile_type, then #corresponding tiles in expected and output may be of different #tile_type. So we need to have self.bands_expected and #self.bands_output expected_tile_dict, output_tile_dict = \ self.get_tile_pathnames(self.expected_conn, self.test_conn) tiles_expected = set(expected_tile_dict.keys()) tiles_output = set(output_tile_dict.keys()) tiles_expected_or_output = tiles_expected | tiles_output #Construct band source table as per datacube module self.bands_expected = \ self.construct_bands_source_dict(self.expected_conn) self.bands_output =\ self.construct_bands_source_dict(self.test_conn) #file_pattern to parse file name for information file_pattern = [r'(?P<sat>\w+)_(?P<sensor>\w+)_', r'(?P<processing_level>\w+)_', r'(?P<xindex>-*\d+)_(?P<yindex>-*\d+)_' r'(?P<year>\d+)-(?P<month>\d+)-' r'(?P<day>\d+)T(?P<hour>\d+)-(?P<minute>\d+)-', r'(?P<second_whole>\d+)\.(?P<second_fraction>\d+)' r'\.(?P<file_extension>.+)'] pattern = re.compile(''.join(file_pattern)) #Set up dictionary of pixel counts to be accumulated per #(procesing_level, tile_layer) over all tiles #0: total_pixel_count_expected #1: total_pixel_count_output #2: total_pixel_count_both #3: total_pixel_count_expected_not_output #4: total_pixel_count_output_not_expected pixel_count_dict = {} #Set up nested dicts of differece counts difference_count_dict = {} #For each tile in EXPECTED_DIR and OUTPUT_DIR, get pixel counts and #difference histograms #There are five dictionaries involved: ### tile_name_dict {'sat': LS5, 'sensor': TM, ...} ### bands_dict_expected: those bands from self.bands_expected ### corresponding to current tile's ### tile_type_id and (satellite, sensor) ### bands_dict_output: output database's correspondent to ### bands_dict_expected ### level_dict_expected: those bands from bands_dict_expected ### for which the processing level ### matches that for the current tile ### level_dict_output: output database's correspondent to ### level_dict_expected ### all_levels_info_dict [level_dict_expected, #### level_dict_output] ### for each processing level all_levels_info_dict = {} for tile_name in tiles_expected_or_output: print 'processing tile %s' %tile_name tile_type_id_expected = None tile_type_id_output = None fname_expected = None fname_output = None #If tile is in either database, extract tile_type and pathname if tile_name in tiles_expected: tile_type_id_expected, fname_expected = \ expected_tile_dict[tile_name] if tile_name in tiles_output: tile_type_id_output, fname_output = \ output_tile_dict[tile_name] #Extract information from the tile name and select #nested dictionary for this tile from bands table, #given the (sat, sensor) [or("DERIVED', 'PQA') for PQA], #which will be common to expected and output tiles, and the #tile_type_id, which may be different for expected and output matchobj = re.match(pattern, tile_name) tile_name_dict = matchobj.groupdict() full_key_expected = \ self.get_tiletype_sat_sens_level(tile_type_id_expected, tile_name_dict) full_key_output = \ self.get_tiletype_sat_sens_level(tile_type_id_output, tile_name_dict) #Following will raise assertion error if a tile's #tile_type_id has changed since benchmark ingestion full_key = self.check_equal_or_null(full_key_expected, full_key_output) level_dict_expected = {} level_dict_output = {} #full_key is (tile_type, sat, sensor, processing_level) if full_key in all_levels_info_dict: (level_dict_expected, level_dict_output) = \ all_levels_info_dict[full_key] if level_dict_expected == {} and full_key_expected != None: level_dict_expected = \ self.collect_source_bands(self.bands_expected, full_key) if level_dict_output == {} and full_key_output != None: level_dict_output = \ self.collect_source_bands(self.bands_output, full_key) if full_key not in all_levels_info_dict: all_levels_info_dict[full_key] = [level_dict_expected, level_dict_output] if all_levels_info_dict[full_key][0] == {} and \ level_dict_expected != {}: all_levels_info_dict[full_key][0] = level_dict_expected if all_levels_info_dict[full_key][1] == {} and \ level_dict_output != {}: all_levels_info_dict[full_key][1] = level_dict_output #Check that the number of bands is as expected, adding #singleton dimension if only one band ([data_expected, data_output], number_layers) = \ self.load_and_check(fname_expected, fname_output, level_dict_expected, level_dict_output) assert bool(fname_expected) == (data_expected != None) and \ bool(fname_output) == (data_output != None), \ "data array should exist if and only if fname exists" for ilayer in range(number_layers): #Define expected and output band data band_expected, dtype_expected = \ self.get_band_data(data_expected, ilayer) band_output, dtype_output = \ self.get_band_data(data_output, ilayer) assert (band_expected == None) == (dtype_expected == None)\ and (band_output == None) == (dtype_output == None), \ "band data should exist if and only if dtype exists" dtype_this = self.check_equal_or_null(dtype_expected, dtype_output) #calculate the number of bins required to store the #histogram of differences from this datatype if tile_name_dict['processing_level'] == 'PQA': #possible difference values are 0 through 16, #(number of tests which differ) bin_count = 16 + 1 else: #possible difference vals are min through max of dtype bin_count = numpy.iinfo(dtype_this).max - \ numpy.iinfo(dtype_this).min + 1 assert bin_count < 66000, "datatype is more than 16" \ "bits, need to add code to coarsen the" \ "histogram bins or use apriori max and" \ "min values of the data" #The histograms are per (level, layer). #Could have one histogram per (sat, sensor, level, layer) #and then, depending on verbosity, aggregate during report. #But for now, just key by (level, layer). result_key = (full_key[3], ilayer + 1) if result_key not in pixel_count_dict: pixel_count_dict[result_key] = numpy.zeros(shape=(5), dtype=numpy.uint64) difference_count_dict[result_key] = \ numpy.zeros(shape=(bin_count), dtype=numpy.uint64) pixel_count = pixel_count_dict[result_key] difference_count = difference_count_dict[result_key] if tile_name_dict['processing_level'] == 'PQA': if band_expected is None: band_expected = 0 if band_output is None: band_output = 0 #define index as those pixels with contiguity bit set index_expected = \ numpy.bitwise_and(band_expected, 1 << self.PQA_CONTIGUITY_BIT) > 0 index_output = \ numpy.bitwise_and(band_output, 1 << self.PQA_CONTIGUITY_BIT) > 0 else: #For NBAR and ORTHO use nodata_value nodata_value = \ level_dict_output[ilayer + 1]['nodata_value'] if band_expected is None: band_expected = nodata_value if band_output is None: band_output = nodata_value index_expected = band_expected != nodata_value index_output = band_output != nodata_value pixel_count[0] += numpy.count_nonzero(index_expected) pixel_count[1] += numpy.count_nonzero(index_output) pixel_count[2] += \ numpy.count_nonzero(numpy.logical_and(index_expected, index_output)) pixel_count[3] += \ numpy.count_nonzero(numpy.logical_and (index_expected, ~index_output)) pixel_count[4] += \ numpy.count_nonzero(numpy.logical_and (~index_expected, index_output)) #Only want to calculate differences at common pixels index_both = numpy.logical_and(index_expected, index_output) if numpy.count_nonzero(index_both) == 0: continue valid_data_expected = band_expected[index_both].ravel() valid_data_output = band_output[index_both].ravel() #Calculate difference histogram and add to running total if tile_name_dict['processing_level'] == 'PQA': difference = \ self.count_bitwise_diffs(valid_data_expected, valid_data_output) else: difference = abs(valid_data_output.astype(numpy.int64) - valid_data_expected.astype(numpy.int64)) hist, dummy_bin_edges = \ numpy.histogram(difference, numpy.array(range(bin_count + 1), dtype=numpy.uint64)) difference_count += hist #dereference band data band_expected = None band_output = None difference = None #end of layer loop #dereference tile data data_expected = None data_output = None #Output #for sat_sen, band_dict in all_bands_dict: fp = open(os.path.join(self.OUTPUT_DIR, 'Histogram_output.txt'), 'w') fp.writelines('##### COMPARISON OF TILED DATA IN FOLLOWING '\ 'DIRECTORES\n%s\n%s\n' %(self.EXPECTED_DIR, self.OUTPUT_DIR)) result_keys_processed = [] for full_key in all_levels_info_dict.keys(): dummy, dummy, dummy, processing_level = full_key top_layer_result_key = (processing_level, 1) if top_layer_result_key in result_keys_processed: continue fp.writelines('#### Processing Level: %s\n' %processing_level) level_dict_expected, level_dict_output = \ all_levels_info_dict[full_key] assert set(level_dict_expected.keys()) == \ set(level_dict_output.keys()), "different key sets" number_layers = len(level_dict_output.keys()) for this_layer in range(1, number_layers + 1): result_key = (processing_level, this_layer) result_keys_processed.append(result_key) fp.writelines('### tile_layer = %d\n' %this_layer) for key, val in level_dict_expected[this_layer].items(): if key == 'tile_layer' or key == 'level_name': continue outline = '# %s = %s' %(key, val) if str(level_dict_output[this_layer][key]) != str(val): outline = '%s (%s in output database)' \ %(outline, level_dict_output[this_layer][key]) fp.writelines('%s\n' %outline) #get key for pixel_count_dict and difference_count_dict #Print counts of pixels with valid data fp.writelines('#Valid data counts\n') pixel_count = pixel_count_dict[result_key] count_desc = ['Expected\t', 'Output\t\t', 'Common\t\t', 'Missing\t\t', 'Extra\t\t'] for desc, num in zip(count_desc, pixel_count): fp.writelines('\t\t%s%d\n' %(desc, num)) #Print histograms of differences in valid data fp.writelines('#Histogram of differences in valid data\n') difference_count = difference_count_dict[result_key] index_nonzero_bins = difference_count > 0 for bin_no in range(len(difference_count)): if index_nonzero_bins[bin_no]: fp.writelines('\t\tDifference of %d: %d\n' %(bin_no, difference_count[bin_no])) fp.close() else: if self.mode > 0: self.skipTest("Expected database save file not found.")
def compare_tile_stores(db1, db2, schema1='public', schema2='public', output=sys.stdout): """Compares the tile stores from two databases. Database Connection db1 is assumed to represent the production tile store, against which we wish to verify the tile store resulting from a Fresh Ingest, which has taken place onto the previously-empty Database Connection db2. This function runs in three stages: 1. Gather the Fresh Ingest information on Database Connection db2 into a table and copy this accross to Database Connection db1, the production database. 2. On Database Connection db1, merge the table from Step 1 to find the corresponding production tiles. 3. For those Fresh Ingest tiles where a production tile can be found, compare the two tiles and report if there is a difference. It can happen that the tile exists on Fresh Ingest but not on production tile store. This can happen for one of several reasons: a) The old ingest used PQA to determine the existence of lower-level data. By contrast, the Fresh Ingest process looks at the tile directly to evaluate the exitence of data. b) Mosaic tiles used to be created on user-request by the stacker class of the API. By contrast, The Fresh Ingest process does this automatically. c) The coverage method of the Fresh Ingest process will, very occasionally, pick up some new tiles. Such anomalies are reported in the output stream with a "WARNING" prefix Preconditions: db1 and db2 are open database connections. These are assumed to be psycopg2 connections to PostgreSQL databases. Tables that are not being explictly ignored are expected to have primary keys. Positional Arguments: db1, db2: Connections to the databases to be compared. Keyword Arguments: schema1: The schema to be used for the first database (db1), defaults to 'public'. schema2: The schema to be used for the second database (db2), defaults to 'public'. output: Where the output goes. This is assumed to be a file object. Defaults to sys.stdout. Return Value: Returns a list (path1, path2) of those corresponding tile pairs where the contents differ. """ pair = TileComparisonPair(db1, db2, schema1, schema2) #TEMPORARY delete some tiles: _temp_delete_some_tiles(pair) # Create a random 9-digit string to append to tables" random_suffix = dbutil.random_name("_") # Name of table to which information from fresh ingest will be written. test_ingest_table = 'test_ingest%s' % random_suffix # Create the table pertaining to the fresh ingest and copy it to the # production database. _copy_fresh_ingest_info(pair, test_ingest_table) # Create tuple (list_both, list_db1_not_db2, list_db2_not_db1), where each # list is a list of tuples: # (level, tile_class_id1, tile_class_id2, path1, path2). (list_both, list_db1_not_db2, list_db2_not_db1) = \ _get_comparison_pairs(pair, test_ingest_table) # Output information for the edge cases of tiles being in only one database tile_list = [p[3] for p in list_db1_not_db2] _log_missing_tile_info(tile_list, pair.db1_name, pair.db2_name, output) tile_list = [p[4] for p in list_db2_not_db1] _log_missing_tile_info(tile_list, pair.db2_name, pair.db1_name, output) output.writelines('There might be further mosaic tiles that are missing\n') # Compare the tiles if they both exist difference_pairs = _compare_tile_contents(list_both, output) return difference_pairs
def test_onescene(self): """Test database update for a single scene.""" # This test is intended as an example, and so is extensively # commented. # Open a log file logfile_path = os.path.join(self.OUTPUT_DIR, "test_onescene.log") self.logfile = open(logfile_path, "w") # # Create the initial database # # Randomise the name to avoid collisons with other users. self.test_dbname = dbutil.random_name("test_onescene") # Create the database. dbutil.TESTSERVER.create(self.test_dbname, self.INPUT_DIR, "hypercube_empty.sql") # # Run dbupdater on the test database and save the result # # Create an updated datacube_conf file with the new dbname config_file_path = dbutil.update_config_file(self.test_dbname, self.INPUT_DIR, self.OUTPUT_DIR, "test_datacube.conf") # Run dbupdater ingest_dir = os.path.join(self.INPUT_DIR, 'onescene') dbupdater_cmd = ["python", "dbupdater.py", "--debug", "--config=%s" % config_file_path, "--source=%s" % ingest_dir, "--removedblist", "--followsymlinks"] subprocess.check_call(dbupdater_cmd, stdout=self.logfile, stderr=subprocess.STDOUT) # Save the updated database dbutil.TESTSERVER.save(self.test_dbname, self.OUTPUT_DIR, "onescene.sql") # # If an expected result exists then load it and compare # # Check for expected result if os.path.isfile(os.path.join(self.EXPECTED_DIR, "onescene.sql")): # Create a randomised name... self.expected_dbname = dbutil.random_name("expected_onescene") # load the database... dbutil.TESTSERVER.create(self.expected_dbname, self.EXPECTED_DIR, "onescene.sql") # create database connections... self.test_conn = dbutil.TESTSERVER.connect(self.test_dbname) self.expected_conn = dbutil.TESTSERVER.connect( self.expected_dbname) # and compare. self.assertTrue(dbcompare.compare_databases(self.test_conn, self.expected_conn, output=self.logfile, verbosity=3), "Databases do not match.") else: self.skipTest("Expected database save file not found.")
def compare_tile_stores(db1, db2, schema1='public', schema2='public', output=sys.stdout): """Compares the tile stores from two databases. Database Connection db1 is assumed to represent the production tile store, against which we wish to verify the tile store resulting from a Fresh Ingest, which has taken place onto the previously-empty Database Connection db2. This function runs in three stages: 1. Gather the Fresh Ingest information on Database Connection db2 into a table and copy this accross to Database Connection db1, the production database. 2. On Database Connection db1, merge the table from Step 1 to find the corresponding production tiles. 3. For those Fresh Ingest tiles where a production tile can be found, compare the two tiles and report if there is a difference. It can happen that the tile exists on Fresh Ingest but not on production tile store. This can happen for one of several reasons: a) The old ingest used PQA to determine the existence of lower-level data. By contrast, the Fresh Ingest process looks at the tile directly to evaluate the exitence of data. b) Mosaic tiles used to be created on user-request by the stacker class of the API. By contrast, The Fresh Ingest process does this automatically. c) The coverage method of the Fresh Ingest process will, very occasionally, pick up some new tiles. Such anomalies are reported in the output stream with a "WARNING" prefix Preconditions: db1 and db2 are open database connections. These are assumed to be psycopg2 connections to PostgreSQL databases. Tables that are not being explictly ignored are expected to have primary keys. Positional Arguments: db1, db2: Connections to the databases to be compared. Keyword Arguments: schema1: The schema to be used for the first database (db1), defaults to 'public'. schema2: The schema to be used for the second database (db2), defaults to 'public'. output: Where the output goes. This is assumed to be a file object. Defaults to sys.stdout. Return Value: Returns a list (path1, path2) of those corresponding tile pairs where the contents differ. """ pair = TileComparisonPair(db1, db2, schema1, schema2) #TEMPORARY delete some tiles: _temp_delete_some_tiles(pair) # Create a random 9-digit string to append to tables" random_suffix = dbutil.random_name("_") # Name of table to which information from fresh ingest will be written. test_ingest_table = 'test_ingest%s' %random_suffix # Create the table pertaining to the fresh ingest and copy it to the # production database. _copy_fresh_ingest_info(pair, test_ingest_table) # Create tuple (list_both, list_db1_not_db2, list_db2_not_db1), where each # list is a list of tuples: # (level, tile_class_id1, tile_class_id2, path1, path2). (list_both, list_db1_not_db2, list_db2_not_db1) = \ _get_comparison_pairs(pair, test_ingest_table) # Output information for the edge cases of tiles being in only one database tile_list = [p[3] for p in list_db1_not_db2] _log_missing_tile_info(tile_list, pair.db1_name, pair.db2_name, output) tile_list = [p[4] for p in list_db2_not_db1] _log_missing_tile_info(tile_list, pair.db2_name, pair.db1_name, output) output.writelines('There might be further mosaic tiles that are missing\n') # Compare the tiles if they both exist difference_pairs = _compare_tile_contents(list_both, output) return difference_pairs