Пример #1
0
    def test_get_full_sample_data_on_trait_collection(self, test_data_dir, create_sample):
        ea = ExampleArchive(basepath=test_data_dir)
        if create_sample:
            sample = fidia.Sample.new_from_archive(ea)
        else:
            sample = ea
        # an_object_id = sample.contents[0]
        # an_object_id = 'Gal1'
        mass_data = sample.dmu['StellarMasses'].table['StellarMasses'].stellar_mass
        print(mass_data)
        assert isinstance(mass_data, pd.Series)

        assert len(mass_data) == len(ea.contents)

        print(ea.contents)

        for id, value in zip(ea.contents, mass_data):
            assert sample[id].dmu['StellarMasses'].table['StellarMasses'].stellar_mass == value

        sfr_data = sample.dmu['StellarMasses'].table['StarFormationRates'].sfr
        print(sfr_data)
        assert isinstance(sfr_data, pd.Series)
        for id, value in zip(ea.contents, sfr_data):
            if np.isnan(value):
                assert np.isnan(sample[id].dmu['StellarMasses'].table['StarFormationRates'].sfr)
            else:
                assert sample[id].dmu['StellarMasses'].table['StarFormationRates'].sfr == value
Пример #2
0
def test_remove_archive(monkeypatch, test_data_dir,
                        clean_persistence_database):

    # NOTE: This works on a completely empty persistence database provided by the
    # `clean_persistence_database` fixture.

    session = fidia.mappingdb_session

    # Add the ExampleArchive to the database:
    from fidia.archive.example_archive import ExampleArchive
    ar = ExampleArchive(basepath=test_data_dir)

    # Confirm that there are entries in the database
    assert session.query(fidia.Archive).count() == 1
    assert session.query(fidia.traits.TraitMapping).count() > 0
    assert session.query(fidia.traits.SubTraitMapping).count() > 0
    assert session.query(fidia.FIDIAColumn).count() > 0

    # Remove our archive
    session.delete(ar)

    # Confirm that all DB entries have been removed.
    assert session.query(fidia.Archive).count() == 0
    assert session.query(fidia.traits.TraitMapping).count() == 0
    assert session.query(fidia.traits.SubTraitMapping).count() == 0
    assert session.query(fidia.FIDIAColumn).count() == 0

    session.close()
Пример #3
0
def test_ingestion_nonarray_column(test_data_dir, dal_data_dir):
    ar = ExampleArchive(basepath=test_data_dir)  # type: fidia.Archive

    # dal_data_dir = "/Users/agreen/scratch/dal/"

    array_column = ar.columns[
        "ExampleArchive:FITSBinaryTableColumn:stellar_masses.fits[1].data[ID->StellarMass]:1"]

    file_store = NumpyFileStore(dal_data_dir)

    file_store.ingest_column(array_column)

    dir_text = subprocess.check_output(["ls", "-R", dal_data_dir])
    print(dir_text.decode())

    data_dir = os.path.join(
        dal_data_dir,
        "ExampleArchive",
        "FITSBinaryTableColumn",
        "stellar_masses.fits[1].data[ID->StellarMass]",
        "1",
    )

    # Check that the files are created
    for object_id in ar.contents:
        assert os.path.exists(os.path.join(data_dir, "pandas_series.pkl"))

    # Check that the dal can retrieve the data again, and it matches:
    for object_id in ar.contents:
        d = file_store.get_value(array_column, object_id)
        assert np.array_equal(
            d, ar[object_id].dmu["StellarMasses"].table["StellarMasses"].
            stellar_mass)
Пример #4
0
def test_full_ingestion_removes_need_for_original_data(
        clean_persistence_database):
    """This test checks both the full ingestion, and that such an ingestion removes the need for the original data."""

    # NOTE: This works on a completely empty persistence database provided by the
    # `clean_persistence_database` fixture.

    with tempfile.TemporaryDirectory() as dal_data_dir:
        with tempfile.TemporaryDirectory() as test_data_dir:
            testdata.generate_simple_dataset(test_data_dir, 5)

            ar = ExampleArchive(basepath=test_data_dir)  # type: fidia.Archive

            file_store = NumpyFileStore(dal_data_dir)
            file_store.ingest_archive(ar)

            # Add this layer to FIDIA's known data access layers
            fidia.dal_host.layers.append(file_store)

            dir_text = subprocess.check_output(["ls", "-R", dal_data_dir])
            print(dir_text.decode())

        assert not os.path.exists(test_data_dir)

        # Now check that data can still be accessed even after original data is removed.
        for object_id in ar.contents:
            ar[object_id].dmu["StellarMasses"].table[
                "StellarMasses"].stellar_mass

        for object_id in ar.contents:
            ar[object_id].image["red"].data

        # Remove layer from FIDIA DAL to avoid problems with other tests:
        idx = fidia.dal_host.layers.index(file_store)
        del fidia.dal_host.layers[idx]
Пример #5
0
    def test_get_data(self, test_data_dir):
        ea = ExampleArchive(basepath=test_data_dir)
        sample = fidia.Sample.new_from_archive(ea)
        # an_object_id = sample.contents[0]
        # an_object_id = 'Gal1'
        image_data = sample['Gal1'].image['red'].data

        print(image_data)
        assert isinstance(image_data, np.ndarray)
        assert image_data.ndim == 2
Пример #6
0
    def test_get_an_astro_object_from_archive(self, test_data_dir):
        ea = ExampleArchive(basepath=test_data_dir)
        # an_object_id = next(iter(sample.keys()))
        an_object_id = 'Gal1'
        astro_object = ea[an_object_id]
        assert isinstance(astro_object, fidia.AstronomicalObject)

        sfr = astro_object.dmu['StellarMasses'].table['StarFormationRates'].sfr
        print(sfr)
        assert isinstance(sfr, (int, float))
Пример #7
0
    def test_get_data_subtrait(self, test_data_dir):
        ea = ExampleArchive(basepath=test_data_dir)
        sample = fidia.Sample.new_from_archive(ea)
        # an_object_id = sample.contents[0]
        # an_object_id = 'Gal1'
        wcs = sample['Gal1'].image['red'].wcs

        print(wcs)
        assert isinstance(wcs, fidia.Trait)
        print(wcs.cdelt1)
        assert isinstance(wcs.cdelt1, (int, float))
Пример #8
0
    def test_known_archives_get_all(self, test_data_dir):
        # Guarantee that ExampleArchive will appear it the persistence database:
        ExampleArchive(basepath=test_data_dir)


        import fidia.archive.archive
        all_archives = fidia.known_archives.all
        assert isinstance(all_archives, list)
        if len(all_archives) > 0:
            for ar in all_archives:
                assert isinstance(ar, fidia.Archive)
        print(all_archives)
        print([ar.archive_id for ar in all_archives])
Пример #9
0
    def test_output_matches_input(self, test_data_dir):
        ea = ExampleArchive(basepath=test_data_dir)
        sample = fidia.Sample.new_from_archive(ea)
        for an_object_id in sample.ids:
            print("an_object_id = %s" % an_object_id)
            image_data = sample[an_object_id].image['red'].data

            file_path = test_data_dir + "/{object_id}/{object_id}_red_image.fits".format(object_id=an_object_id)
            print("file_path = %s" % file_path)

            print("archive_path = %s" % ea.basepath)

            input_data = fits.open(file_path)

            assert (input_data[0].data == image_data).all()
Пример #10
0
    def test_get_data_on_trait_collection(self, test_data_dir, create_sample):
        ea = ExampleArchive(basepath=test_data_dir)
        if create_sample:
            sample = fidia.Sample.new_from_archive(ea)
        else:
            sample = ea
        # an_object_id = sample.contents[0]
        # an_object_id = 'Gal1'
        mass = sample['Gal1'].dmu['StellarMasses'].table['StellarMasses'].stellar_mass
        print(mass)
        assert isinstance(mass, (int, float))

        sfr = sample['Gal1'].dmu['StellarMasses'].table['StarFormationRates'].sfr
        print(sfr)
        assert isinstance(sfr, (int, float))
Пример #11
0
    def test_get_full_sample_data(self, test_data_dir, create_sample):
        ea = ExampleArchive(basepath=test_data_dir)
        if create_sample:
            sample = fidia.Sample.new_from_archive(ea)
        else:
            sample = ea
        # an_object_id = sample.contents[0]
        # an_object_id = 'Gal1'
        image_data = sample.image['red'].data

        print(image_data)
        for d in image_data:
            assert isinstance(d, np.ndarray)
            assert d.ndim == 2

        assert len(list(image_data)) == len(ea.contents)
Пример #12
0
def test_types_are_preserved(test_data_dir, dal_data_dir):

    ar = ExampleArchive(basepath=test_data_dir)  # type: fidia.Archive

    column = ar.columns[
        "ExampleArchive:FITSHeaderColumn:{object_id}/{object_id}_red_image.fits[0].header[NAXIS]:1"]

    input_type = type(column.get_value("Gal1", provenance="definition"))

    assert input_type is int

    file_store = NumpyFileStore(dal_data_dir)
    file_store.ingest_column(column)

    output_type = file_store.get_value(column, "Gal1").dtype

    assert output_type == input_type
Пример #13
0
def test_data_volumne(test_data_dir):
    def get_size(start_path='.'):
        total_size = 0
        for dirpath, dirnames, filenames in os.walk(start_path):
            for f in filenames:
                fp = os.path.join(dirpath, f)
                total_size += os.path.getsize(fp)
        return total_size

    ar = ExampleArchive(basepath=test_data_dir)  # type: fidia.Archive

    with tempfile.TemporaryDirectory() as dal_data_dir:
        file_store = NumpyFileStore(dal_data_dir)
        file_store.ingest_archive(ar)

        ingest_size = get_size(dal_data_dir)

    original_size = get_size(test_data_dir)
Пример #14
0
def test_ingestion_array_column(test_data_dir, dal_data_dir):
    ar = ExampleArchive(basepath=test_data_dir)  # type: fidia.Archive

    # dal_data_dir = "/Users/agreen/scratch/dal/"

    array_column = ar.columns[
        "ExampleArchive:FITSDataColumn:{object_id}/{object_id}_red_image.fits[0]:1"]

    file_store = NumpyFileStore(dal_data_dir)

    file_store.ingest_column(array_column)

    dir_text = subprocess.check_output(["ls", "-R", dal_data_dir])
    print(dir_text.decode())

    data_dir = os.path.join(
        dal_data_dir,
        "ExampleArchive",
        "FITSDataColumn",
        "{object_id}/{object_id}_red_image.fits[0]",
        "1",
    )

    # Check that the files are created
    for object_id in ar.contents:
        assert os.path.exists(os.path.join(data_dir, object_id + ".npy"))

    # Check that the dal can retrieve the data again, and it matches:
    for object_id in ar.contents:
        print(object_id)
        d = file_store.get_value(array_column, object_id)
        print(type(d))
        print(d.dtype)

        orig = ar[object_id].image["red"].data

        print(type(orig))
        print(orig.dtype)
        assert np.allclose(d, orig)
        assert np.array_equal(d, orig)
Пример #15
0
    def test_get_full_sample_data_subtrait(self, test_data_dir, create_sample):
        ea = ExampleArchive(basepath=test_data_dir)
        assert isinstance(ea, fidia.Archive)
        if create_sample:
            sample = fidia.Sample.new_from_archive(ea)
        else:
            sample = ea
        # an_object_id = sample.contents[0]
        # an_object_id = 'Gal1'
        wcs = sample.image['red'].wcs

        print(wcs)
        for w in wcs:
            assert isinstance(w, fidia.Trait)
            print(w)
            print(w.cdelt1)
            assert isinstance(w.cdelt1, (int, float))

            assert w.cdelt1 == sample[w.object_id].image['red'].wcs.cdelt1


        assert len(list(wcs)) == len(ea.contents)
Пример #16
0
def test_ingestion_benchmarks(benchmark, clean_persistence_database,
                              test_data_dir):

    with tempfile.TemporaryDirectory() as test_data_dir:
        testdata.generate_simple_dataset(test_data_dir, 30)

        ar = ExampleArchive(basepath=test_data_dir)  # type: fidia.Archive

        def func():
            with tempfile.TemporaryDirectory() as dal_data_dir:

                file_store = NumpyFileStore(dal_data_dir)
                file_store.ingest_archive(ar)

                # Add this layer to FIDIA's known data access layers
                fidia.dal_host.layers.append(file_store)

                # Remove layer from FIDIA DAL to avoid problems with other tests:
                idx = fidia.dal_host.layers.index(file_store)
                del fidia.dal_host.layers[idx]

        benchmark(func)
Пример #17
0
    def test_data_getters_persisted(self, clean_persistence_database,
                                    test_data_dir):

        from fidia.archive.example_archive import ExampleArchive
        from fidia.archive import Archive

        ar = ExampleArchive(basepath=test_data_dir)

        # Make SQLAlchemy forget about the object:
        fidia.mappingdb_session.expunge(ar)
        del ar

        ar = fidia.mappingdb_session.query(Archive).filter_by(
            _db_archive_id=ExampleArchive.archive_id).one()

        # Check that we actually have reconstructed the object from the
        # database, and not just holding a pointer to the original object:
        assert ar._is_reconstructed is True

        # Ensure no DAL layers are interfering with this test.
        assert len(fidia.dal_host.layers) == 0

        # Retrieve data using original getters
        ar["Gal1"].image["red"].data
Пример #18
0
 def example_archive_sample(self):
     ar = ExampleArchive()
     sample = ar.get_full_sample()
     return sample
Пример #19
0
 def example_sample(self):
     return ExampleArchive().get_full_sample()
Пример #20
0
    def test_known_archives_get_by_id(self, test_data_dir):
        # Guarantee that ExampleArchive will appear it the persistence database:
        ExampleArchive(basepath=test_data_dir)
        ar = fidia.known_archives.by_id["ExampleArchive"]

        print(ar)
Пример #21
0
 def example_archive(self, test_data_dir):
     return ExampleArchive(basepath=test_data_dir)
Пример #22
0
 def test_get_an_astro_object(self, test_data_dir):
     ea = ExampleArchive(basepath=test_data_dir)
     sample = fidia.Sample.new_from_archive(ea)
     an_object_id = next(iter(sample.keys()))
     sample[an_object_id]
Пример #23
0
 def test_sample_contents(self, test_data_dir):
     ea = ExampleArchive(basepath=test_data_dir)
     print(ea.contents)
Пример #24
0
 def test_sample_creation_from_archive(self, test_data_dir):
     ea = ExampleArchive(basepath=test_data_dir)
     sample = fidia.Sample.new_from_archive(ea)
Пример #25
0
 def test_example_archive(self, test_data_dir):
     ea = ExampleArchive(basepath=test_data_dir)
Пример #26
0
 def example_archive_sample(self, test_data_dir):
     ar = ExampleArchive(basepath=test_data_dir)
     sample = Sample.new_from_archive(ar)
     return sample