def test_invalidate_cascade(flowmachine_connect): """ Test that invalidation does not cascade if cascade=False. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() assert dl1.is_stored assert hl1.is_stored assert flow.is_stored dl1.invalidate_db_cache(cascade=False) assert not dl1.is_stored assert hl1.is_stored assert flow.is_stored assert not cache_table_exists(get_db(), dl1.query_id) assert cache_table_exists(get_db(), hl1.query_id) has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies")) assert has_deps
def test_invalidate_cache_multi(flowmachine_connect): """ Test that invalidating a simple query that is part of a bigger one drops both tables, cleans up dependencies and removes both from cache. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() dl1.invalidate_db_cache() assert not dl1.is_stored assert not hl1.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'")) assert not in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert not in_cache has_deps = bool( flowmachine_connect.fetch("SELECT * FROM cache.dependencies")) assert has_deps
def test_invalidate_cascade(flowmachine_connect): """ Test that invalidation does not cascade if cascade=False. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() dl1.invalidate_db_cache(cascade=False) assert not dl1.is_stored assert hl1.is_stored assert flow.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'")) assert not in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert in_cache has_deps = bool( flowmachine_connect.fetch("SELECT * FROM cache.dependencies")) assert has_deps
def test_do_cache_multi(flowmachine_connect): """ Test that a query containing subqueries can be cached. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1._db_store_cache_metadata() assert cache_table_exists(flowmachine_connect, hl1.md5)
def test_can_be_aggregated_lon_lat(get_dataframe): """ Query can be aggregated to a spatial level with lon-lat data. """ hl = ModalLocation(*[ daily_location( d, spatial_unit=make_spatial_unit("lon-lat"), method="last") for d in list_of_dates("2016-01-01", "2016-01-03") ]) agg = hl.aggregate() df = get_dataframe(agg) assert ["lon", "lat", "value"] == list(df.columns)
def test_deps_cache_multi(): """ Test that correct dependencies are returned. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) dep = dl1.query_id assert 4 == len(hl1._get_stored_dependencies()) assert dep in [x.query_id for x in hl1._get_stored_dependencies()]
def test_store_cache_multi(flowmachine_connect): """ Test that storing a query containing subqueries also caches it. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() # Should be stored assert hl1.is_stored assert cache_table_exists(get_db(), hl1.query_id)
def test_do_cache_nested(flowmachine_connect): """ Test that a query containing nested subqueries can be cached. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) write_cache_metadata(get_db(), flow) assert cache_table_exists(get_db(), flow.query_id)
def test_can_be_aggregated_latlong(get_dataframe): """ Query can be aggregated to a spatial level with lat-lon data. """ hl = ModalLocation( *[ daily_location(d, level="lat-lon", method="last") for d in list_of_dates("2016-01-01", "2016-01-03") ] ) agg = hl.aggregate() df = get_dataframe(agg) assert ["lat", "lon", "total"] == list(df.columns)
def test_do_cache_multi(flowmachine_connect): """ Test that a query containing subqueries can be cached. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1._db_store_cache_metadata() in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert in_cache
def test_store_cache_nested(flowmachine_connect): """ Test that storing a query with nested subqueries also caches it. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() # Should be stored assert flow.is_stored assert cache_table_exists(get_db(), flow.query_id)
def test_store_cache_multi(flowmachine_connect): """ Test that storing a query containing subqueries also caches it. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() # Should be stored assert hl1.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert in_cache
def test_contact_reference_location_no_spatial_unit_raises(): """ Test ValueError is raised for contact_location without spatial_unit attribute. """ cb = ContactBalance("2016-01-01", "2016-01-03") # by encapsulating ModalLocations in a CustomQuery we remove the spatial_unit # attribute from it which should raise an error ml = ModalLocation(*[ daily_location( d, spatial_unit=make_spatial_unit("versioned-cell"), subscriber_subset=cb.counterparts_subset(include_subscribers=True), ) for d in list_of_dates("2016-01-01", "2016-01-03") ]) ml = CustomQuery(ml.get_query(), ml.column_names) with pytest.raises(ValueError): query = ContactReferenceLocationStats(cb, ml)
def test_do_cache_nested(flowmachine_connect): """ Test that a query containing nested subqueries can be cached. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow._db_store_cache_metadata() in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{flow.md5}'")) assert in_cache
def test_contact_reference_location_stats(get_dataframe, statistic, msisdn, spatial_unit_type, want): """ Test a few hand-picked ContactReferenceLocationStats. """ cb = ContactBalance("2016-01-01", "2016-01-03") ml = ModalLocation(*[ daily_location( d, spatial_unit=make_spatial_unit(spatial_unit_type), subscriber_subset=cb.counterparts_subset(include_subscribers=True), ) for d in list_of_dates("2016-01-01", "2016-01-03") ]) cb.store() ml.store() query = ContactReferenceLocationStats(cb, ml, statistic=statistic) df = get_dataframe(query).set_index("subscriber") assert df.value[msisdn] == pytest.approx(want)
def test_deps_cache_broken_chain(): """ Test that a Query -> not_cached -> cached chain will return a dependency on cached. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) dep = dl1.query_id assert 8 == len(flow._get_stored_dependencies()) assert dep in [x.query_id for x in flow._get_stored_dependencies()]
def test_store_cache_nested(flowmachine_connect): """ Test that storing a query with nested subqueries also caches it. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() # Should be stored assert flow.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{flow.md5}'")) assert in_cache
def test_invalidate_cache_midchain(flowmachine_connect): """ Test that invalidating a query in the middle of a chain drops the top of the chain and this link, but not the bottom. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() hl1.invalidate_db_cache() assert dl1.is_stored assert not hl1.is_stored assert not flow.is_stored in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{dl1.md5}'")) assert in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{hl1.md5}'")) assert not in_cache in_cache = bool( flowmachine_connect.fetch( f"SELECT * FROM cache.cached WHERE query_id='{flow.md5}'")) assert not in_cache has_deps = bool( flowmachine_connect.fetch("SELECT * FROM cache.dependencies")) assert has_deps # Daily location deps should remain
def test_invalidate_cache_midchain(flowmachine_connect): """ Test that invalidating a query in the middle of a chain drops the top of the chain and this link, but not the bottom. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() assert dl1.is_stored assert hl1.is_stored assert flow.is_stored hl1.invalidate_db_cache() assert dl1.is_stored assert not hl1.is_stored assert not flow.is_stored assert cache_table_exists(get_db(), dl1.query_id) assert not cache_table_exists(get_db(), hl1.query_id) assert not cache_table_exists(get_db(), flow.query_id) has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies")) assert has_deps # Daily location deps should remain
def test_deps_cache_chain(): """ Test that a Query -> cached1 -> cached2 chain will return only a dependency on cached1. """ dl1 = daily_location("2016-01-01") hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) bad_dep = dl1.query_id good_dep = hl1.query_id assert 6 == len(flow._get_stored_dependencies()) assert good_dep in [x.query_id for x in flow._get_stored_dependencies()] assert bad_dep not in [x.query_id for x in flow._get_stored_dependencies()]
def test_run_modal_location_query(send_zmq_message_and_receive_reply): """ Can run modal location query and receive successful response including the query_id. """ msg = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "modal_location", "locations": [ { "query_kind": "daily_location", "date": "2016-01-01", "method": "most-common", "aggregation_unit": "admin3", "subscriber_subset": None, }, { "query_kind": "daily_location", "date": "2016-01-02", "method": "most-common", "aggregation_unit": "admin3", "subscriber_subset": None, }, ], "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg) q = SpatialAggregate(locations=ModalLocation( daily_location( date="2016-01-01", method="most-common", spatial_unit=make_spatial_unit("admin", level=3), subscriber_subset=None, ), daily_location( date="2016-01-02", method="most-common", spatial_unit=make_spatial_unit("admin", level=3), subscriber_subset=None, ), )) expected_query_id = q.md5 assert "success" == reply["status"] assert expected_query_id == reply["payload"]["query_id"] assert ["query_id"] == list(reply["payload"].keys())
def test_retrieve_all(): """ Test that Query.get_stored returns everything. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() from_cache = [obj.query_id for obj in Query.get_stored()] assert dl1.query_id in from_cache assert hl1.query_id in from_cache assert flow.query_id in from_cache
def test_subset_correct(subscriber_list, get_dataframe): """Test that pushed in subsetting matches .subset result""" su = EventTableSubset(start="2016-01-01", stop="2016-01-03", subscriber_subset=subscriber_list) subsu = EventTableSubset(start="2016-01-01", stop="2016-01-03").subset("subscriber", subscriber_list) assert all(get_dataframe(su) == get_dataframe(subsu)) su = ModalLocation(*[ daily_location(d, subscriber_subset=subscriber_list) for d in list_of_dates("2016-01-01", "2016-01-07") ]) subsu = ModalLocation( * [daily_location(d) for d in list_of_dates("2016-01-01", "2016-01-03")]).subset( "subscriber", subscriber_list) assert all(get_dataframe(su) == get_dataframe(subsu))
def test_do_cache_multi(flowmachine_connect): """ Test that a query containing subqueries can be cached. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) write_cache_metadata(get_db(), hl1) assert cache_table_exists(get_db(), hl1.query_id)
def test_error_when_modal_location_not_lon_lat(): """ Test that error is raised if home location passed to class is not using lon-lat spatial unit """ ml = ModalLocation( *[daily_location(d) for d in list_of_dates("2016-01-01", "2016-01-02")] ) with pytest.raises(ValueError): Displacement("2016-01-01", "2016-01-02", modal_locations=ml, statistic="avg")
def _flowmachine_query_obj(self): """ Return the underlying flowmachine ModalLocation object. Returns ------- ModalLocation """ from flowmachine.features import ModalLocation locations = [loc._flowmachine_query_obj for loc in self.locations] return ModalLocation(*locations)
def test_invalidate_cache_multi(flowmachine_connect): """ Test that invalidating a simple query that is part of a bigger one drops both tables, cleans up dependencies and removes both from cache. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() assert dl1.is_stored assert hl1.is_stored dl1.invalidate_db_cache() assert not dl1.is_stored assert not hl1.is_stored assert not cache_table_exists(get_db(), dl1.query_id) assert not cache_table_exists(get_db(), hl1.query_id) has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies")) assert has_deps # the remaining dependencies are due to underlying Table objects
def test_contact_reference_location_stats_false_statistic_raises(): """ Test ValueError is raised for non-compliant statistics parameter. """ cb = ContactBalance("2016-01-01", "2016-01-03") ml = ModalLocation(*[ daily_location( d, spatial_unit=make_spatial_unit("versioned-cell"), subscriber_subset=cb.counterparts_subset(include_subscribers=True), ) for d in list_of_dates("2016-01-01", "2016-01-03") ]) with pytest.raises(ValueError): query = ContactReferenceLocationStats(cb, ml, statistic="error")
def test_contact_reference_location_stats_custom_geometry(get_dataframe): """ Test ContactReferenceLocationStats with custom geometry column. """ cb = ContactBalance("2016-01-01", "2016-01-03") ml = ModalLocation(*[ daily_location( d, spatial_unit=make_spatial_unit("versioned-cell"), subscriber_subset=cb.counterparts_subset(include_subscribers=True), ) for d in list_of_dates("2016-01-01", "2016-01-03") ]) cb.store() ml.store() ml = CustomQuery( f"SELECT subscriber, ST_POINT(lon, lat) AS loc FROM ({ml.get_query()}) _", ["subscriber", "loc"], ) query = ContactReferenceLocationStats(cb, ml, statistic="avg", geom_column="loc") df = get_dataframe(query).set_index("subscriber") assert df.value["gwAynWXp4eWvxGP7"] == pytest.approx(298.7215)
def test_selected_values(get_dataframe): """ ModalLocation() values are correct. """ hdf = get_dataframe( ModalLocation( *[daily_location(d) for d in list_of_dates("2016-01-01", "2016-01-03")] ) ).set_index("subscriber") assert "Dolpa" == hdf.ix["038OVABN11Ak4W5P"][0] assert "Baglung" == hdf.ix["E1n7JoqxPBjvR5Ve"][0] assert "Myagdi" == hdf.ix["gkBLe0mN5j3qmRpX"][0] assert "Kapilbastu" == hdf.ix["5Kgwy8Gp6DlN3Eq9"][0]