示例#1
0
    def test_deferred_results_not_held(self):
        """
        Regression test for fix. The results of a callable for engine_url were being persisted on
        relayed_kwargs in :method:`Connect._prepare_connection`. DeferredResolution was being used
        in this case and it's a common pattern.
        """
        class AnimalsSurvey(Model):
            rodents = Connect(
                engine_url=connector_resolver.my_survey.sample_data(rodent_type="mice"))

        class ResolverA:
            def sample_data(self, rodent_type):
                if rodent_type == "mice":
                    return "csv://mice_sample_a.csv"
                raise ValueError("This line should be unreachable in this test")

        files_at_runtime = ResolverA()
        with connector_resolver.context(my_survey=files_at_runtime):
            m = AnimalsSurvey()
            first_call_engine_url = m.rodents.engine_url

        class ResolverB:
            def sample_data(self, rodent_type):
                if rodent_type == "mice":
                    return "csv://mice_sample_b.csv"
                raise ValueError("This line should be unreachable in this test")

        files_at_runtime = ResolverB()
        with connector_resolver.context(my_survey=files_at_runtime):
            m = AnimalsSurvey()
            second_call_engine_url = m.rodents.engine_url

        self.assertNotEqual(first_call_engine_url, second_call_engine_url)
示例#2
0
    def test_resolve_engine_url(self):
        """
        The engine_url contains a parameter that is replaced on demand.
        """
        msg = "There are existing resolver callables before the test has started"
        self.assertEqual(0, len(connector_resolver.unnamed_callables), msg)

        class MockFakeEngineResolver:
            "Record when it's used and just substitute {data_version} with '1234'"

            def __init__(self):
                self.has_been_called = False

            def __call__(self, unresolved_engine_url):
                self.has_been_called = True
                return unresolved_engine_url.format(**{'data_version': '1234'})

        c = CsvConnector(engine_url="csv://my_path/data_{data_version}.csv")

        m_resolver = MockFakeEngineResolver()
        with connector_resolver.context(m_resolver):
            self.assertFalse(m_resolver.has_been_called, "Should only be called on demand")
            msg = "One resolver exists during the .context"
            self.assertEqual(1, len(connector_resolver.unnamed_callables), msg)

            self.assertEqual('csv://my_path/data_1234.csv', c.engine_url)

            msg = "Should have been called after engine_url is available"
            self.assertTrue(m_resolver.has_been_called, msg)

        msg = "At end of with .context the MockFakeEngineResolver should have been removed"
        self.assertEqual(0, len(connector_resolver.unnamed_callables), msg)
示例#3
0
    def test_multi_connector_add(self):
        """
        Use MultiConnector's convenience method for adding engine_urls at run time.
        Also ensure the connector resolver is still being used.
        """
        class FishStocksCollator(FakeModel):
            fish = Connect(engine_url=['csv://{file_location}/pond_1.csv',
                                       'csv://{file_location}/pond_2.csv',
                                       ]
                           )

            def build(self):
                # add a new dataset at runtime
                c = self.fish.add_engine_url('csv://{file_location}/pond_3.csv')
                assert isinstance(c, CsvConnector)
                assert c.engine_url == 'csv:///data/pond_3.csv'

        def file_location_resolver(unresolved_engine_url):
            return unresolved_engine_url.format(**{'file_location': '/data'})

        with connector_resolver.context(file_location_resolver):
            m = FishStocksCollator()
            m.build()
            all_urls = [connector.engine_url for connector in m.fish]

        expected_urls = ['csv:///data/pond_1.csv', 'csv:///data/pond_2.csv',
                         'csv:///data/pond_3.csv',
                         ]
        self.assertEqual(expected_urls, all_urls)
示例#4
0
    def test_deferred_attribute_access(self):
        """
        If a Connect uses a callable to return engine_urls at runtime and this callable uses
        connector_resolver's named attributes there is a catch 22. -- the resolver needs the
        attribute to be set before the model class is imported. Solution is a deferred call that
        is only evaluated by Connect._prepare_connection
        """
        class InsectSurvey(Model):
            ants = Connect(engine_url=connector_resolver.my_ants.all_the_files(ant_types="red"))

            def build(self):
                assert self.ants.engine_url == "csv://red_ants.csv"

        # ------- at this point ------------
        # without the deferred call this would have failed by here because `importing` InsectSurvey
        # would have evaluated 'ants = Connect(...)'

        class MyFileResolver:
            def all_the_files(self, ant_types):
                if ant_types == "red":
                    return "csv://red_ants.csv"
                raise ValueError("This line should be unreachable in this test")

        files_at_runtime = MyFileResolver()
        with connector_resolver.context(my_ants=files_at_runtime):
            m = InsectSurvey()
            m.build()
示例#5
0
    def test_engine_from_manifest(self):
        """
        Use list of files from manifest to load other datasets.

        @see notes in EngineFromManifest

        ./data/manifest_abcd.json contains a list of files, well it could be a list but is just
        one file - 'blue_ants.csv'.

        'abcd' is the build serial number.
        """
        class InsectSurvey(Model):
            manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_" +
                               "{build_id}.json")
            ants = Connect(
                engine_url=EngineFromManifest(manifest, "source_files", "csv"))
            invertebrates = Connect(
                engine_url=EngineFromManifest(manifest, "single_file", "json"))

            def build(self):
                return

        with connector_resolver.context(build_id="abcd"):
            m = InsectSurvey()
            m.go()  # uses pre_build(), build() etc.
            ants_engine_url = m.ants.engine_url
            invertebrates_engine_url = m.invertebrates.engine_url

        self.assertEqual(ants_engine_url, ["csv://blue_ants.csv"])
        self.assertEqual(invertebrates_engine_url, "json://worms.json")
示例#6
0
    def test_attribute_access_to_instances(self):

        class SaladResolver:
            def available_today(self):
                return ["csv://cucumbers.csv", "csv://cress.csv"]

        fresh_salad = SaladResolver()
        with connector_resolver.context(salad=fresh_salad):
            todays_engine_urls = connector_resolver.salad.available_today()

        self.assertEqual(["csv://cucumbers.csv", "csv://cress.csv"], todays_engine_urls)
        self.assertNotIn('salad', connector_resolver._attr, "Post context clean up failed")
示例#7
0
    def test_callable_mapper_value(self):

        class CheeseSales(Model):
            products = Connect(engine_url="csv://my_path_x/data_{data_version}.csv")

        def simple_resolver(*args):
            return "deep_fried_brie"

        with connector_resolver.context(data_version=simple_resolver):
            m = CheeseSales()
            resolved_engine_url = m.products.engine_url

        self.assertEqual('csv://my_path_x/data_deep_fried_brie.csv', resolved_engine_url)
示例#8
0
    def test_multi_connector_resolve(self):
        """
        MultiConnector + ConnectorResolver.
        Other tests for this in :class:`TestConnectors`.
        """

        def simple_resolver(unresolved_engine_url):
            return unresolved_engine_url.format(**{'data_version': '1234'})

        # A MultiConnector
        c = Connect(engine_url=["csv://my_path_x/data_{data_version}.csv",
                                "csv://my_path_y/data_{data_version}.csv"
                                ]
                    )

        with connector_resolver.context(simple_resolver):
            resolved_engine_urls = [data_conn.engine_url for data_conn in c]

        expected_urls = ['csv://my_path_x/data_1234.csv', 'csv://my_path_y/data_1234.csv']
        self.assertEqual(expected_urls, resolved_engine_urls)
示例#9
0
    def test_without_with_statement(self):
        """
        In unit tests it's helpful to use the same resolver context across a few methods. For
        example in unittest's setUp, tearDown and the test itself.
        """
        class LizardLocator(FakeModel):
            habitats = Connect(engine_url='csv://{file_location}/habitat.csv')

            def get_the_important_engine_url(self):
                return self.habitats.engine_url

        def file_location_resolver(unresolved_engine_url):
            return unresolved_engine_url.format(**{'file_location': '/data'})

        m = LizardLocator()
        with self.assertRaises(ValueError) as exception_context:
            m.get_the_important_engine_url()

        exception_message = str(exception_context.exception)
        msg = "Without a connector_resolver it shouldn't be possible to get the engine_url"
        self.assertIn("Couldn't fully resolve engine URL", exception_message, msg)
        self.assertIn("Missing template variables are: {file_location}", exception_message)

        # using .start() and .finish() instead of a with statement
        local_context = connector_resolver.context(file_location_resolver)
        local_context.start()

        m = LizardLocator()
        self.assertEqual('csv:///data/habitat.csv', m.get_the_important_engine_url())

        msg = "One resolver exists between .start() and .finish()"
        self.assertEqual(1, len(connector_resolver.unnamed_callables), msg)

        # drop the local context
        local_context.finish()

        self.assertEqual(0, len(connector_resolver.unnamed_callables), msg)
示例#10
0
    def run_model(
        worker_id,
        total_workers,
        ayeaye_model_cls,
        subtask_kwargs_queue,
        return_values_queue,
        initialise,
        context_kwargs,
    ):
        """
        @param worker_id: (int)
            unique number assigned in ascending order to workers as they start

        @param total_workers: (int)
            Number of workers in pool or None for dynamic workers

        @param ayeaye_model_cls: subclass of :class:`ayeaye.PartitionedModel`
            Class, not object/instance.
            This will be instantiated without arguments and subtasks will be methods executed on
            this instance.

        @param subtask_kwargs_queue: :class:`multiprocessing.Queue` object
            subtasks are defined by the (method_name, kwargs) (str, dict) items read from this queue

        @param return_values_queue: :class:`multiprocessing.Queue` object
            method_name, method_kwargs, subtask_return_value from running are sent back to the
            calling the subtask along this queue.

        @param initialise: None, dict or list
            args or kwargs for Aye-aye model's :method:`partition_initialise`

        @param context_kwargs: (dict)
            see constructor
        """
        with connector_resolver.context(**context_kwargs["mapper"]):
            model = ayeaye_model_cls()

            model.runtime.worker_id = worker_id
            model.runtime.total_workers = total_workers

            init_args = []
            init_kwargs = {}
            if initialise is not None:
                for init_as in initialise:
                    if isinstance(init_as, list):
                        init_args = init_as
                    elif isinstance(init_as, dict):
                        init_kwargs = init_as
                    else:
                        raise ValueError("Unknown initialise variable")

            model.partition_initialise(*init_args, **init_kwargs)

            while True:
                method_name, method_kwargs = subtask_kwargs_queue.get()
                if method_name is None:
                    break

                if method_kwargs is None:
                    method_kwargs = {}

                # TODO - :method:`log` for the worker processes should be connected back to the parent
                # with a queue or pipe and it shouldn't be using stdout

                # TODO - supply the connector_resolver context

                # TODO - handle exceptions

                sub_task_method = getattr(model, method_name)
                subtask_return_value = sub_task_method(**method_kwargs)
                return_values_queue.put((method_name, method_kwargs, subtask_return_value))

            model.close_datasets()
示例#11
0
    def test_named_variables(self):

        with connector_resolver.context(env_secret_password="******"):
            x = Connect(engine_url="mysql://*****:*****@localhost/my_database")
            x.connect_standalone()
            self.assertEqual('mysql://*****:*****@localhost/my_database', x.engine_url)