def test_get_full_sample_data_on_trait_collection(self, test_data_dir, create_sample): ea = ExampleArchive(basepath=test_data_dir) if create_sample: sample = fidia.Sample.new_from_archive(ea) else: sample = ea # an_object_id = sample.contents[0] # an_object_id = 'Gal1' mass_data = sample.dmu['StellarMasses'].table['StellarMasses'].stellar_mass print(mass_data) assert isinstance(mass_data, pd.Series) assert len(mass_data) == len(ea.contents) print(ea.contents) for id, value in zip(ea.contents, mass_data): assert sample[id].dmu['StellarMasses'].table['StellarMasses'].stellar_mass == value sfr_data = sample.dmu['StellarMasses'].table['StarFormationRates'].sfr print(sfr_data) assert isinstance(sfr_data, pd.Series) for id, value in zip(ea.contents, sfr_data): if np.isnan(value): assert np.isnan(sample[id].dmu['StellarMasses'].table['StarFormationRates'].sfr) else: assert sample[id].dmu['StellarMasses'].table['StarFormationRates'].sfr == value
def test_remove_archive(monkeypatch, test_data_dir, clean_persistence_database): # NOTE: This works on a completely empty persistence database provided by the # `clean_persistence_database` fixture. session = fidia.mappingdb_session # Add the ExampleArchive to the database: from fidia.archive.example_archive import ExampleArchive ar = ExampleArchive(basepath=test_data_dir) # Confirm that there are entries in the database assert session.query(fidia.Archive).count() == 1 assert session.query(fidia.traits.TraitMapping).count() > 0 assert session.query(fidia.traits.SubTraitMapping).count() > 0 assert session.query(fidia.FIDIAColumn).count() > 0 # Remove our archive session.delete(ar) # Confirm that all DB entries have been removed. assert session.query(fidia.Archive).count() == 0 assert session.query(fidia.traits.TraitMapping).count() == 0 assert session.query(fidia.traits.SubTraitMapping).count() == 0 assert session.query(fidia.FIDIAColumn).count() == 0 session.close()
def test_ingestion_nonarray_column(test_data_dir, dal_data_dir): ar = ExampleArchive(basepath=test_data_dir) # type: fidia.Archive # dal_data_dir = "/Users/agreen/scratch/dal/" array_column = ar.columns[ "ExampleArchive:FITSBinaryTableColumn:stellar_masses.fits[1].data[ID->StellarMass]:1"] file_store = NumpyFileStore(dal_data_dir) file_store.ingest_column(array_column) dir_text = subprocess.check_output(["ls", "-R", dal_data_dir]) print(dir_text.decode()) data_dir = os.path.join( dal_data_dir, "ExampleArchive", "FITSBinaryTableColumn", "stellar_masses.fits[1].data[ID->StellarMass]", "1", ) # Check that the files are created for object_id in ar.contents: assert os.path.exists(os.path.join(data_dir, "pandas_series.pkl")) # Check that the dal can retrieve the data again, and it matches: for object_id in ar.contents: d = file_store.get_value(array_column, object_id) assert np.array_equal( d, ar[object_id].dmu["StellarMasses"].table["StellarMasses"]. stellar_mass)
def test_full_ingestion_removes_need_for_original_data( clean_persistence_database): """This test checks both the full ingestion, and that such an ingestion removes the need for the original data.""" # NOTE: This works on a completely empty persistence database provided by the # `clean_persistence_database` fixture. with tempfile.TemporaryDirectory() as dal_data_dir: with tempfile.TemporaryDirectory() as test_data_dir: testdata.generate_simple_dataset(test_data_dir, 5) ar = ExampleArchive(basepath=test_data_dir) # type: fidia.Archive file_store = NumpyFileStore(dal_data_dir) file_store.ingest_archive(ar) # Add this layer to FIDIA's known data access layers fidia.dal_host.layers.append(file_store) dir_text = subprocess.check_output(["ls", "-R", dal_data_dir]) print(dir_text.decode()) assert not os.path.exists(test_data_dir) # Now check that data can still be accessed even after original data is removed. for object_id in ar.contents: ar[object_id].dmu["StellarMasses"].table[ "StellarMasses"].stellar_mass for object_id in ar.contents: ar[object_id].image["red"].data # Remove layer from FIDIA DAL to avoid problems with other tests: idx = fidia.dal_host.layers.index(file_store) del fidia.dal_host.layers[idx]
def test_get_data(self, test_data_dir): ea = ExampleArchive(basepath=test_data_dir) sample = fidia.Sample.new_from_archive(ea) # an_object_id = sample.contents[0] # an_object_id = 'Gal1' image_data = sample['Gal1'].image['red'].data print(image_data) assert isinstance(image_data, np.ndarray) assert image_data.ndim == 2
def test_get_an_astro_object_from_archive(self, test_data_dir): ea = ExampleArchive(basepath=test_data_dir) # an_object_id = next(iter(sample.keys())) an_object_id = 'Gal1' astro_object = ea[an_object_id] assert isinstance(astro_object, fidia.AstronomicalObject) sfr = astro_object.dmu['StellarMasses'].table['StarFormationRates'].sfr print(sfr) assert isinstance(sfr, (int, float))
def test_get_data_subtrait(self, test_data_dir): ea = ExampleArchive(basepath=test_data_dir) sample = fidia.Sample.new_from_archive(ea) # an_object_id = sample.contents[0] # an_object_id = 'Gal1' wcs = sample['Gal1'].image['red'].wcs print(wcs) assert isinstance(wcs, fidia.Trait) print(wcs.cdelt1) assert isinstance(wcs.cdelt1, (int, float))
def test_known_archives_get_all(self, test_data_dir): # Guarantee that ExampleArchive will appear it the persistence database: ExampleArchive(basepath=test_data_dir) import fidia.archive.archive all_archives = fidia.known_archives.all assert isinstance(all_archives, list) if len(all_archives) > 0: for ar in all_archives: assert isinstance(ar, fidia.Archive) print(all_archives) print([ar.archive_id for ar in all_archives])
def test_output_matches_input(self, test_data_dir): ea = ExampleArchive(basepath=test_data_dir) sample = fidia.Sample.new_from_archive(ea) for an_object_id in sample.ids: print("an_object_id = %s" % an_object_id) image_data = sample[an_object_id].image['red'].data file_path = test_data_dir + "/{object_id}/{object_id}_red_image.fits".format(object_id=an_object_id) print("file_path = %s" % file_path) print("archive_path = %s" % ea.basepath) input_data = fits.open(file_path) assert (input_data[0].data == image_data).all()
def test_get_data_on_trait_collection(self, test_data_dir, create_sample): ea = ExampleArchive(basepath=test_data_dir) if create_sample: sample = fidia.Sample.new_from_archive(ea) else: sample = ea # an_object_id = sample.contents[0] # an_object_id = 'Gal1' mass = sample['Gal1'].dmu['StellarMasses'].table['StellarMasses'].stellar_mass print(mass) assert isinstance(mass, (int, float)) sfr = sample['Gal1'].dmu['StellarMasses'].table['StarFormationRates'].sfr print(sfr) assert isinstance(sfr, (int, float))
def test_get_full_sample_data(self, test_data_dir, create_sample): ea = ExampleArchive(basepath=test_data_dir) if create_sample: sample = fidia.Sample.new_from_archive(ea) else: sample = ea # an_object_id = sample.contents[0] # an_object_id = 'Gal1' image_data = sample.image['red'].data print(image_data) for d in image_data: assert isinstance(d, np.ndarray) assert d.ndim == 2 assert len(list(image_data)) == len(ea.contents)
def test_types_are_preserved(test_data_dir, dal_data_dir): ar = ExampleArchive(basepath=test_data_dir) # type: fidia.Archive column = ar.columns[ "ExampleArchive:FITSHeaderColumn:{object_id}/{object_id}_red_image.fits[0].header[NAXIS]:1"] input_type = type(column.get_value("Gal1", provenance="definition")) assert input_type is int file_store = NumpyFileStore(dal_data_dir) file_store.ingest_column(column) output_type = file_store.get_value(column, "Gal1").dtype assert output_type == input_type
def test_data_volumne(test_data_dir): def get_size(start_path='.'): total_size = 0 for dirpath, dirnames, filenames in os.walk(start_path): for f in filenames: fp = os.path.join(dirpath, f) total_size += os.path.getsize(fp) return total_size ar = ExampleArchive(basepath=test_data_dir) # type: fidia.Archive with tempfile.TemporaryDirectory() as dal_data_dir: file_store = NumpyFileStore(dal_data_dir) file_store.ingest_archive(ar) ingest_size = get_size(dal_data_dir) original_size = get_size(test_data_dir)
def test_ingestion_array_column(test_data_dir, dal_data_dir): ar = ExampleArchive(basepath=test_data_dir) # type: fidia.Archive # dal_data_dir = "/Users/agreen/scratch/dal/" array_column = ar.columns[ "ExampleArchive:FITSDataColumn:{object_id}/{object_id}_red_image.fits[0]:1"] file_store = NumpyFileStore(dal_data_dir) file_store.ingest_column(array_column) dir_text = subprocess.check_output(["ls", "-R", dal_data_dir]) print(dir_text.decode()) data_dir = os.path.join( dal_data_dir, "ExampleArchive", "FITSDataColumn", "{object_id}/{object_id}_red_image.fits[0]", "1", ) # Check that the files are created for object_id in ar.contents: assert os.path.exists(os.path.join(data_dir, object_id + ".npy")) # Check that the dal can retrieve the data again, and it matches: for object_id in ar.contents: print(object_id) d = file_store.get_value(array_column, object_id) print(type(d)) print(d.dtype) orig = ar[object_id].image["red"].data print(type(orig)) print(orig.dtype) assert np.allclose(d, orig) assert np.array_equal(d, orig)
def test_get_full_sample_data_subtrait(self, test_data_dir, create_sample): ea = ExampleArchive(basepath=test_data_dir) assert isinstance(ea, fidia.Archive) if create_sample: sample = fidia.Sample.new_from_archive(ea) else: sample = ea # an_object_id = sample.contents[0] # an_object_id = 'Gal1' wcs = sample.image['red'].wcs print(wcs) for w in wcs: assert isinstance(w, fidia.Trait) print(w) print(w.cdelt1) assert isinstance(w.cdelt1, (int, float)) assert w.cdelt1 == sample[w.object_id].image['red'].wcs.cdelt1 assert len(list(wcs)) == len(ea.contents)
def test_ingestion_benchmarks(benchmark, clean_persistence_database, test_data_dir): with tempfile.TemporaryDirectory() as test_data_dir: testdata.generate_simple_dataset(test_data_dir, 30) ar = ExampleArchive(basepath=test_data_dir) # type: fidia.Archive def func(): with tempfile.TemporaryDirectory() as dal_data_dir: file_store = NumpyFileStore(dal_data_dir) file_store.ingest_archive(ar) # Add this layer to FIDIA's known data access layers fidia.dal_host.layers.append(file_store) # Remove layer from FIDIA DAL to avoid problems with other tests: idx = fidia.dal_host.layers.index(file_store) del fidia.dal_host.layers[idx] benchmark(func)
def test_data_getters_persisted(self, clean_persistence_database, test_data_dir): from fidia.archive.example_archive import ExampleArchive from fidia.archive import Archive ar = ExampleArchive(basepath=test_data_dir) # Make SQLAlchemy forget about the object: fidia.mappingdb_session.expunge(ar) del ar ar = fidia.mappingdb_session.query(Archive).filter_by( _db_archive_id=ExampleArchive.archive_id).one() # Check that we actually have reconstructed the object from the # database, and not just holding a pointer to the original object: assert ar._is_reconstructed is True # Ensure no DAL layers are interfering with this test. assert len(fidia.dal_host.layers) == 0 # Retrieve data using original getters ar["Gal1"].image["red"].data
def example_archive_sample(self): ar = ExampleArchive() sample = ar.get_full_sample() return sample
def example_sample(self): return ExampleArchive().get_full_sample()
def test_known_archives_get_by_id(self, test_data_dir): # Guarantee that ExampleArchive will appear it the persistence database: ExampleArchive(basepath=test_data_dir) ar = fidia.known_archives.by_id["ExampleArchive"] print(ar)
def example_archive(self, test_data_dir): return ExampleArchive(basepath=test_data_dir)
def test_get_an_astro_object(self, test_data_dir): ea = ExampleArchive(basepath=test_data_dir) sample = fidia.Sample.new_from_archive(ea) an_object_id = next(iter(sample.keys())) sample[an_object_id]
def test_sample_contents(self, test_data_dir): ea = ExampleArchive(basepath=test_data_dir) print(ea.contents)
def test_sample_creation_from_archive(self, test_data_dir): ea = ExampleArchive(basepath=test_data_dir) sample = fidia.Sample.new_from_archive(ea)
def test_example_archive(self, test_data_dir): ea = ExampleArchive(basepath=test_data_dir)
def example_archive_sample(self, test_data_dir): ar = ExampleArchive(basepath=test_data_dir) sample = Sample.new_from_archive(ar) return sample