def test_deferred_results_not_held(self): """ Regression test for fix. The results of a callable for engine_url were being persisted on relayed_kwargs in :method:`Connect._prepare_connection`. DeferredResolution was being used in this case and it's a common pattern. """ class AnimalsSurvey(Model): rodents = Connect( engine_url=connector_resolver.my_survey.sample_data(rodent_type="mice")) class ResolverA: def sample_data(self, rodent_type): if rodent_type == "mice": return "csv://mice_sample_a.csv" raise ValueError("This line should be unreachable in this test") files_at_runtime = ResolverA() with connector_resolver.context(my_survey=files_at_runtime): m = AnimalsSurvey() first_call_engine_url = m.rodents.engine_url class ResolverB: def sample_data(self, rodent_type): if rodent_type == "mice": return "csv://mice_sample_b.csv" raise ValueError("This line should be unreachable in this test") files_at_runtime = ResolverB() with connector_resolver.context(my_survey=files_at_runtime): m = AnimalsSurvey() second_call_engine_url = m.rodents.engine_url self.assertNotEqual(first_call_engine_url, second_call_engine_url)
def test_resolve_engine_url(self): """ The engine_url contains a parameter that is replaced on demand. """ msg = "There are existing resolver callables before the test has started" self.assertEqual(0, len(connector_resolver.unnamed_callables), msg) class MockFakeEngineResolver: "Record when it's used and just substitute {data_version} with '1234'" def __init__(self): self.has_been_called = False def __call__(self, unresolved_engine_url): self.has_been_called = True return unresolved_engine_url.format(**{'data_version': '1234'}) c = CsvConnector(engine_url="csv://my_path/data_{data_version}.csv") m_resolver = MockFakeEngineResolver() with connector_resolver.context(m_resolver): self.assertFalse(m_resolver.has_been_called, "Should only be called on demand") msg = "One resolver exists during the .context" self.assertEqual(1, len(connector_resolver.unnamed_callables), msg) self.assertEqual('csv://my_path/data_1234.csv', c.engine_url) msg = "Should have been called after engine_url is available" self.assertTrue(m_resolver.has_been_called, msg) msg = "At end of with .context the MockFakeEngineResolver should have been removed" self.assertEqual(0, len(connector_resolver.unnamed_callables), msg)
def test_multi_connector_add(self): """ Use MultiConnector's convenience method for adding engine_urls at run time. Also ensure the connector resolver is still being used. """ class FishStocksCollator(FakeModel): fish = Connect(engine_url=['csv://{file_location}/pond_1.csv', 'csv://{file_location}/pond_2.csv', ] ) def build(self): # add a new dataset at runtime c = self.fish.add_engine_url('csv://{file_location}/pond_3.csv') assert isinstance(c, CsvConnector) assert c.engine_url == 'csv:///data/pond_3.csv' def file_location_resolver(unresolved_engine_url): return unresolved_engine_url.format(**{'file_location': '/data'}) with connector_resolver.context(file_location_resolver): m = FishStocksCollator() m.build() all_urls = [connector.engine_url for connector in m.fish] expected_urls = ['csv:///data/pond_1.csv', 'csv:///data/pond_2.csv', 'csv:///data/pond_3.csv', ] self.assertEqual(expected_urls, all_urls)
def test_deferred_attribute_access(self): """ If a Connect uses a callable to return engine_urls at runtime and this callable uses connector_resolver's named attributes there is a catch 22. -- the resolver needs the attribute to be set before the model class is imported. Solution is a deferred call that is only evaluated by Connect._prepare_connection """ class InsectSurvey(Model): ants = Connect(engine_url=connector_resolver.my_ants.all_the_files(ant_types="red")) def build(self): assert self.ants.engine_url == "csv://red_ants.csv" # ------- at this point ------------ # without the deferred call this would have failed by here because `importing` InsectSurvey # would have evaluated 'ants = Connect(...)' class MyFileResolver: def all_the_files(self, ant_types): if ant_types == "red": return "csv://red_ants.csv" raise ValueError("This line should be unreachable in this test") files_at_runtime = MyFileResolver() with connector_resolver.context(my_ants=files_at_runtime): m = InsectSurvey() m.build()
def test_engine_from_manifest(self): """ Use list of files from manifest to load other datasets. @see notes in EngineFromManifest ./data/manifest_abcd.json contains a list of files, well it could be a list but is just one file - 'blue_ants.csv'. 'abcd' is the build serial number. """ class InsectSurvey(Model): manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_" + "{build_id}.json") ants = Connect( engine_url=EngineFromManifest(manifest, "source_files", "csv")) invertebrates = Connect( engine_url=EngineFromManifest(manifest, "single_file", "json")) def build(self): return with connector_resolver.context(build_id="abcd"): m = InsectSurvey() m.go() # uses pre_build(), build() etc. ants_engine_url = m.ants.engine_url invertebrates_engine_url = m.invertebrates.engine_url self.assertEqual(ants_engine_url, ["csv://blue_ants.csv"]) self.assertEqual(invertebrates_engine_url, "json://worms.json")
def test_attribute_access_to_instances(self): class SaladResolver: def available_today(self): return ["csv://cucumbers.csv", "csv://cress.csv"] fresh_salad = SaladResolver() with connector_resolver.context(salad=fresh_salad): todays_engine_urls = connector_resolver.salad.available_today() self.assertEqual(["csv://cucumbers.csv", "csv://cress.csv"], todays_engine_urls) self.assertNotIn('salad', connector_resolver._attr, "Post context clean up failed")
def test_callable_mapper_value(self): class CheeseSales(Model): products = Connect(engine_url="csv://my_path_x/data_{data_version}.csv") def simple_resolver(*args): return "deep_fried_brie" with connector_resolver.context(data_version=simple_resolver): m = CheeseSales() resolved_engine_url = m.products.engine_url self.assertEqual('csv://my_path_x/data_deep_fried_brie.csv', resolved_engine_url)
def test_multi_connector_resolve(self): """ MultiConnector + ConnectorResolver. Other tests for this in :class:`TestConnectors`. """ def simple_resolver(unresolved_engine_url): return unresolved_engine_url.format(**{'data_version': '1234'}) # A MultiConnector c = Connect(engine_url=["csv://my_path_x/data_{data_version}.csv", "csv://my_path_y/data_{data_version}.csv" ] ) with connector_resolver.context(simple_resolver): resolved_engine_urls = [data_conn.engine_url for data_conn in c] expected_urls = ['csv://my_path_x/data_1234.csv', 'csv://my_path_y/data_1234.csv'] self.assertEqual(expected_urls, resolved_engine_urls)
def test_without_with_statement(self): """ In unit tests it's helpful to use the same resolver context across a few methods. For example in unittest's setUp, tearDown and the test itself. """ class LizardLocator(FakeModel): habitats = Connect(engine_url='csv://{file_location}/habitat.csv') def get_the_important_engine_url(self): return self.habitats.engine_url def file_location_resolver(unresolved_engine_url): return unresolved_engine_url.format(**{'file_location': '/data'}) m = LizardLocator() with self.assertRaises(ValueError) as exception_context: m.get_the_important_engine_url() exception_message = str(exception_context.exception) msg = "Without a connector_resolver it shouldn't be possible to get the engine_url" self.assertIn("Couldn't fully resolve engine URL", exception_message, msg) self.assertIn("Missing template variables are: {file_location}", exception_message) # using .start() and .finish() instead of a with statement local_context = connector_resolver.context(file_location_resolver) local_context.start() m = LizardLocator() self.assertEqual('csv:///data/habitat.csv', m.get_the_important_engine_url()) msg = "One resolver exists between .start() and .finish()" self.assertEqual(1, len(connector_resolver.unnamed_callables), msg) # drop the local context local_context.finish() self.assertEqual(0, len(connector_resolver.unnamed_callables), msg)
def run_model( worker_id, total_workers, ayeaye_model_cls, subtask_kwargs_queue, return_values_queue, initialise, context_kwargs, ): """ @param worker_id: (int) unique number assigned in ascending order to workers as they start @param total_workers: (int) Number of workers in pool or None for dynamic workers @param ayeaye_model_cls: subclass of :class:`ayeaye.PartitionedModel` Class, not object/instance. This will be instantiated without arguments and subtasks will be methods executed on this instance. @param subtask_kwargs_queue: :class:`multiprocessing.Queue` object subtasks are defined by the (method_name, kwargs) (str, dict) items read from this queue @param return_values_queue: :class:`multiprocessing.Queue` object method_name, method_kwargs, subtask_return_value from running are sent back to the calling the subtask along this queue. @param initialise: None, dict or list args or kwargs for Aye-aye model's :method:`partition_initialise` @param context_kwargs: (dict) see constructor """ with connector_resolver.context(**context_kwargs["mapper"]): model = ayeaye_model_cls() model.runtime.worker_id = worker_id model.runtime.total_workers = total_workers init_args = [] init_kwargs = {} if initialise is not None: for init_as in initialise: if isinstance(init_as, list): init_args = init_as elif isinstance(init_as, dict): init_kwargs = init_as else: raise ValueError("Unknown initialise variable") model.partition_initialise(*init_args, **init_kwargs) while True: method_name, method_kwargs = subtask_kwargs_queue.get() if method_name is None: break if method_kwargs is None: method_kwargs = {} # TODO - :method:`log` for the worker processes should be connected back to the parent # with a queue or pipe and it shouldn't be using stdout # TODO - supply the connector_resolver context # TODO - handle exceptions sub_task_method = getattr(model, method_name) subtask_return_value = sub_task_method(**method_kwargs) return_values_queue.put((method_name, method_kwargs, subtask_return_value)) model.close_datasets()
def test_named_variables(self): with connector_resolver.context(env_secret_password="******"): x = Connect(engine_url="mysql://*****:*****@localhost/my_database") x.connect_standalone() self.assertEqual('mysql://*****:*****@localhost/my_database', x.engine_url)