示例#1
0
        class InsectSurvey(Model):
            manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_" +
                               "{build_id}.json")
            ants = Connect(
                engine_url=EngineFromManifest(manifest, "source_files", "csv"))
            invertebrates = Connect(
                engine_url=EngineFromManifest(manifest, "single_file", "json"))

            def build(self):
                return
示例#2
0
 def test_connect_spare_kwargs(self):
     """
     subclasses of :class:`ayeaye.connectors.base.DataConnector` can be given specific/custom
     kwargs. An exception should be raised when unclaimed spare kwargs remain. This will make
     it harder for users to make mistakes and typos referring to arguments that never come
     into play.
     """
     c = Connect(engine_url="fake://foo", doesntexist="oh dear")
     with self.assertRaises(ValueError):
         # the kwargs are not used until an engine_url is needed
         c._prepare_connection()
示例#3
0
    def test_construction_args(self):

        with self.assertRaises(
                ValueError, msg="Ref and engine_url are mutually exclusive"):
            Connect(ref="x", engine_url="tsv://" + EXAMPLE_TSV_PATH)

        model_data_msg = "Ref + engine_url are for DataConnectors and models is for ayeaye.models"
        with self.assertRaises(ValueError, msg=model_data_msg):
            Connect(ref="x", models=AbstractFakeModel)

        with self.assertRaises(ValueError, msg=model_data_msg):
            Connect(engine_url="tsv://" + EXAMPLE_TSV_PATH,
                    models=AbstractFakeModel)
示例#4
0
        class SeabedSurvey(Model):
            manifest = Connect()
            mapper = SeabedMapper(manifest_dataset=manifest,
                                  field_name="more_files")
            x_files = Connect(engine_url=mapper.x)

            def __init__(self, manifest_file, **kwargs):
                super().__init__(**kwargs)
                self.manifest.update(
                    engine_url=f"json://{manifest_file};encoding=utf-8-sig")

            def build(self):
                return
示例#5
0
    def test_compile_time_multiple_engine_urls(self):
        """
        engine_url could be a list of engine_urls.
        In the future, a dictionary version might be added
        """
        tsv_engine_url = "tsv://" + EXAMPLE_TSV_PATH
        csv_engine_url = "csv://" + EXAMPLE_CSV_PATH
        c = Connect(engine_url=[tsv_engine_url, csv_engine_url])

        all_the_animals = []
        for index, data_connector in enumerate(c):

            if index == 0:
                self.assertIsInstance(data_connector, TsvConnector)
            elif index == 1:
                self.assertIsInstance(data_connector, CsvConnector)
            else:
                raise ValueError(
                    "Connect has more than expected data connectors")

            all_the_animals += [
                animal.common_name for animal in data_connector
            ]

        expected = [
            "Goeldi's marmoset",
            "Common squirrel monkey",
            "Crab-eating macaque",
            "Crown of thorns starfish",
            "Golden dart frog",
        ]
        self.assertEqual(expected, all_the_animals)
示例#6
0
 def test_standalone_as_proxy(self):
     """
     Access an attribute of the subclass that doesn't belong to the DataConnector abstract class.
     """
     animals = Connect(engine_url="csv://" + EXAMPLE_CSV_PATH +
                       ";encoding=magic_encoding")
     self.assertEqual("magic_encoding", animals.encoding)
示例#7
0
    def test_connect_callable_kwargs(self):
        """
        :class:`ayeaye.connectots.fake.FakeDataConnector` has an optional kwarg-
        'quantum_accelerator_module' set this using a literal or a callable.
        """

        c = Connect(engine_url="fake://MyDataset",
                    quantum_accelerator_module="entanglement_v1")
        self.assertEqual({"fake": "data"}, c.data[0])
        self.assertEqual("entanglement_v1", c.quantum_accelerator_module)

        def simple_callable():
            "simple means it doesn't take arguments"
            return "entanglement_v2"

        # TODO - standalone is only calling the callable after _prepare_connection
        # this isn't right
        # c = Connect(engine_url="fake://MyDataset", quantum_accelerator_module=simple_callable)
        # self.assertEqual({"fake": "data"}, c.data[0])

        class QuatumSort(AbstractFakeModel):
            source = Connect(engine_url="fake://MyDataset",
                             quantum_accelerator_module=simple_callable)

        m1 = QuatumSort()
        self.assertEqual("entanglement_v2",
                         m1.source.quantum_accelerator_module)
示例#8
0
 def test_connect_standalone(self):
     """
     :class:`ayeaye.Connect` can be used outside of the ETL so data discovery can use the same
     way of working as full :class:`ayeaye.Model`s.
     """
     # happy path
     # it works without Connect being part of a ayeaye.Model
     c = Connect(engine_url="fake://MyDataset")
     self.assertEqual({"fake": "data"}, c.data[0])
示例#9
0
        class LandAnimalsSurvey(Model):
            manifest = Connect()
            build_attributes = ManifestProperty(manifest_dataset=manifest)
            bad_weather = build_attributes.bad_weather

            def __init__(self, manifest_file, **kwargs):
                super().__init__(**kwargs)
                self.manifest.update(
                    engine_url=f"json://{manifest_file};encoding=utf-8-sig")
示例#10
0
 def test_custom_kwargs_are_passed(self):
     """
     ayeaye.Connect should relay kwargs to subclasses of DataConnecter
     """
     # using bigquery because it has custom 'credentials' kwarg
     engine_url = "bigquery://projectId=my_project;datasetId=nice_food;tableId=cakes;"
     c = Connect(engine_url=engine_url, credentials="hello_world")
     # on demand connection
     self.assertIsNotNone(c.data)
     self.assertEqual("hello_world", c._standalone_connection.credentials)
示例#11
0
        class FishStocksCollator(FakeModel):
            fish = Connect(engine_url=['csv://{file_location}/pond_1.csv',
                                       'csv://{file_location}/pond_2.csv',
                                       ]
                           )

            def build(self):
                # add a new dataset at runtime
                c = self.fish.add_engine_url('csv://{file_location}/pond_3.csv')
                assert isinstance(c, CsvConnector)
                assert c.engine_url == 'csv:///data/pond_3.csv'
示例#12
0
    def test_manifest_mapper_find_mapper_methods(self):
        class SuperMapper(AbstractManifestMapper):
            def map_xyz(self):
                pass

            def map_abc(self):
                pass

        manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_abcd.json")
        s = SuperMapper(manifest_dataset=manifest, field_name="more_files")

        key_names = s.methods_mapper.keys()
        self.assertEqual({"xyz", "abc"}, set(key_names))
示例#13
0
    def test_manifest_iterate(self):

        manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_abcd.json")
        m = MagicMapper(manifest_dataset=manifest, field_name="more_files")

        for engine_set in m:
            # just test results for one mapping (fanout) for one manifest listed file
            if engine_set.manifest_item == "z.ndjson":
                expected = ["csv://z.ndjson.csv", "ndjson://z.ndjson.ndjson"]
                self.assertEqual(engine_set.fanout, expected)
                break
        else:
            raise ValueError("test item not found")
示例#14
0
    def test_replace_existing_connect(self):
        class FakeModel(AbstractFakeModel):
            insects = Connect(engine_url="fake://bugsDB")

        m = FakeModel()
        with self.assertRaises(ValueError) as context:
            m.insects = "this is a string, not an instance of Connect"
        self.assertEqual("Only Connect instances can be set",
                         str(context.exception))

        self.assertEqual({}, m._connections,
                         "Connections not initialised prior to access")
        self.assertEqual("fake://bugsDB", m.insects.engine_url,
                         "Original connection")

        m.insects = Connect(engine_url="fake://creepyCrawliesDB")
        self.assertEqual("fake://creepyCrawliesDB", m.insects.engine_url,
                         "New connection")
示例#15
0
    def test_manifest_callable(self):
        """
        map_xxx() method becomes .xxx() method and is callable later
        """

        manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_abcd.json")
        m = MagicMapper(manifest_dataset=manifest, field_name="more_files")

        call_later = m.bijection

        # note - self.map_bijection() returns [(manifest_file, engine_url)..] and
        # .bijection just returns the engine_urls
        expected_engine_urls = [
            "json://x.ndjson", "json://y.ndjson", "json://z.ndjson"
        ]

        # ... it's later now. Call it.
        self.assertEqual(expected_engine_urls, call_later())
示例#16
0
    def test_multi_connector_resolve(self):
        """
        MultiConnector + ConnectorResolver.
        Other tests for this in :class:`TestConnectors`.
        """

        def simple_resolver(unresolved_engine_url):
            return unresolved_engine_url.format(**{'data_version': '1234'})

        # A MultiConnector
        c = Connect(engine_url=["csv://my_path_x/data_{data_version}.csv",
                                "csv://my_path_y/data_{data_version}.csv"
                                ]
                    )

        with connector_resolver.context(simple_resolver):
            resolved_engine_urls = [data_conn.engine_url for data_conn in c]

        expected_urls = ['csv://my_path_x/data_1234.csv', 'csv://my_path_y/data_1234.csv']
        self.assertEqual(expected_urls, resolved_engine_urls)
示例#17
0
    def test_manifest_full_map(self):

        manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_abcd.json")
        m = MagicMapper(manifest_dataset=manifest, field_name="more_files")

        expected = {
            "x.ndjson": {
                "bijection": ["json://x.ndjson"],
                "collapse_in": ["csv://results_summary.csv"],
                "fanout": ["csv://x.ndjson.csv", "ndjson://x.ndjson.ndjson"],
            },
            "y.ndjson": {
                "bijection": ["json://y.ndjson"],
                "collapse_in": ["csv://results_summary.csv"],
                "fanout": ["csv://y.ndjson.csv", "ndjson://y.ndjson.ndjson"],
            },
            "z.ndjson": {
                "bijection": ["json://z.ndjson"],
                "collapse_in": ["csv://results_summary.csv"],
                "fanout": ["csv://z.ndjson.csv", "ndjson://z.ndjson.ndjson"],
            },
        }
        self.assertEqual(expected, m.full_map)
示例#18
0
 class CheeseSales(Model):
     products = Connect(engine_url="csv://my_path_x/data_{data_version}.csv")
示例#19
0
 class AnimalsSurvey(Model):
     rodents = Connect(
         engine_url=connector_resolver.my_survey.sample_data(rodent_type="mice"))
示例#20
0
    def test_named_variables(self):

        with connector_resolver.context(env_secret_password="******"):
            x = Connect(engine_url="mysql://*****:*****@localhost/my_database")
            x.connect_standalone()
            self.assertEqual('mysql://*****:*****@localhost/my_database', x.engine_url)
示例#21
0
        class LizardLocator(FakeModel):
            habitats = Connect(engine_url='csv://{file_location}/habitat.csv')

            def get_the_important_engine_url(self):
                return self.habitats.engine_url
示例#22
0
        class InsectSurvey(Model):
            ants = Connect(engine_url=connector_resolver.my_ants.all_the_files(ant_types="red"))

            def build(self):
                assert self.ants.engine_url == "csv://red_ants.csv"
示例#23
0
 class AnimalsModel(AbstractFakeModel):
     animals = Connect(engine_url="csv://" + EXAMPLE_CSV_PATH)
示例#24
0
    def test_callable_engine_url(self):
        def pointlessly_deterministic_example_callable():
            return "fake://MyDataset"

        c = Connect(engine_url=pointlessly_deterministic_example_callable)
        self.assertEqual({"fake": "data"}, c.data[0], "Example data not found")
示例#25
0
        class QuatumSort(AbstractFakeModel):
            source = Connect(engine_url="fake://MyDataset",
                             quantum_factory=q_fact)

            def calculate_result(self):
                return self.source.quantum_factory("quantum dynamics")
示例#26
0
 class QuatumSort(AbstractFakeModel):
     source = Connect(engine_url="fake://MyDataset",
                      quantum_accelerator_module=simple_callable)
示例#27
0
 class AnimalsModel(AbstractFakeModel):
     animals = Connect(engine_url=[])
示例#28
0
 class FakeModel(AbstractFakeModel):
     insects = Connect(engine_url="fake://bugsDB")
示例#29
0
class FakeModel:
    insects = Connect(engine_url="fake://bugsDB")

    def __init__(self):
        self._connections = {}
示例#30
0
 def __init__(self):
     # the instance of Connect is just a variable, it's not an attribute so it's descriptor
     # methods aren't called.
     self.animals = Connect(engine_url="csv://" + EXAMPLE_CSV_PATH)