def aggregate(self): """ Aggregate to the spatial level returning a query object that represents the location, and the total counts of subscribers. """ return SpatialAggregate(locations=self)
def __init__(self, *, joined_spatial_aggregate: JoinedSpatialAggregate): self.joined_spatial_aggregate = joined_spatial_aggregate self.redacted_spatial_agg = RedactedSpatialAggregate( spatial_aggregate=SpatialAggregate( locations=self.joined_spatial_aggregate.locations)) self.spatial_unit = self.joined_spatial_aggregate.locations.spatial_unit super().__init__()
def test_get_sql(zmq_port, zmq_host): """ Running 'get_sql' on finished query returns the expected result. """ # # Run daily_location query. # msg = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="last", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, hours=None, ))) expected_query_id = q.query_id reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) # assert reply["status"] in ("executing", "queued", "completed") assert "success" == reply["status"] assert expected_query_id == reply["payload"]["query_id"] # # Wait until the query has finished. # poll_until_done(zmq_port, expected_query_id) # # Get query result. # msg = { "action": "get_sql_for_query_result", "params": { "query_id": expected_query_id }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) assert "success" == reply["status"] assert f"SELECT * FROM cache.x{expected_query_id}" == reply["payload"][ "sql"]
def _flowmachine_query_obj(self): """ Return the underlying flowmachine object. Returns ------- Query """ locations = self.locations._flowmachine_query_obj return RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate( locations=locations))
def test_send_zmq_message_and_receive_reply(zmq_host, zmq_port): """ Reply from the flowmachine server to the example message stored in `FM_EXAMPLE_MESSAGE` is as expected. """ # Check that FM_EXAMPLE_MESSAGE contains the expected message msg_expected = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } assert msg_expected == FM_EXAMPLE_MESSAGE q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="last", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, ))) expected_query_id = q.query_id # Check that the flowmachine server sends the expected reply reply = send_zmq_message_and_receive_reply(FM_EXAMPLE_MESSAGE, host=zmq_host, port=zmq_port) assert expected_query_id == reply["payload"]["query_id"] # assert reply["status"] in ("executing", "queued", "completed") assert reply["status"] in ("success") # FIXME: At the moment we have to explicitly wait for all running queries # to finish before finishing the test, otherwise unexpected behaviour may # occur when we reset the cache before the next test # (see https://github.com/Flowminder/FlowKit/issues/1245). poll_until_done(zmq_port, expected_query_id)
def test_run_daily_location_query(zmq_host, zmq_port): """ Can run daily location query and receive successful response including the query_id. """ msg = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "most-common", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) q = RedactedSpatialAggregate( spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="most-common", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, hours=None, ) ) ) expected_query_id = q.query_id assert "success" == reply["status"] assert expected_query_id == reply["payload"]["query_id"] assert ["query_id", "progress"] == list(reply["payload"].keys()) # FIXME: At the moment we have to explicitly wait for all running queries # to finish before finishing the test, otherwise unexpected behaviour may # occur when we reset the cache before the next test # (see https://github.com/Flowminder/FlowKit/issues/1245). poll_until_done(zmq_port, expected_query_id)
def test_run_query(zmq_port, zmq_host, fm_conn, redis): """ Run daily_location query and check the resulting table contains the expected rows. """ msg_run_query = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="last", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, ))) expected_query_id = q.query_id # # Check that we are starting with a clean slate (no cache tables, empty redis). # reset_cache(get_db(), redis, protect_table_objects=False) assert cache_schema_is_empty(get_db()) assert not redis.exists(expected_query_id) # # Send message to run the daily_location query, check it was accepted # and a redis lookup was created for the query id. # reply = send_zmq_message_and_receive_reply(msg_run_query, port=zmq_port, host=zmq_host) # assert reply["status"] in ("executing", "queued", "completed") assert reply["status"] in ("success") assert expected_query_id == reply["payload"]["query_id"] # assert redis.exists(expected_query_id) # # Wait until the query has finished. # poll_until_done(zmq_port, expected_query_id) # # Check that a cache table for the query result was created # and that it contains the expected number of rows. # output_cache_table = f"x{expected_query_id}" assert output_cache_table in get_cache_tables(get_db()) num_rows = (get_db().engine.execute( f"SELECT COUNT(*) FROM cache.{output_cache_table}").fetchone()[0]) assert num_rows == 14 # # In addition, check first few rows of the result are as expected. # first_few_rows_expected = [ ("524 1 02 09", 26), ("524 1 03 13", 20), ("524 3 08 43", 35), ] first_few_rows = (get_db().engine.execute( f"SELECT * FROM cache.{output_cache_table} ORDER BY pcod LIMIT 3"). fetchall()) assert first_few_rows_expected == first_few_rows
def test_cache_content( start_flowmachine_server_with_or_without_dependency_caching, fm_conn, redis): """ Run a query with dependency caching turned on, and check that its dependencies are cached. Run a query with dependency caching turned off, and check that only the query itself is cached. """ # Can't use the zmq_port fixture here as we're running against a different FlowMachine server zmq_port = os.getenv("FLOWMACHINE_PORT") msg_run_query = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="last", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, ))) # Get list of tables that should be cached expected_cache_tables = [q.table_name] if "false" == os.getenv("FLOWMACHINE_SERVER_DISABLE_DEPENDENCY_CACHING"): dependencies = unstored_dependencies_graph(q) for node, query_obj in dependencies.nodes(data="query_object"): try: schema, table_name = query_obj.fully_qualified_table_name.split( ".") if schema == "cache": expected_cache_tables.append(table_name) except NotImplementedError: # Some queries cannot be cached, and don't have table names pass # Check that we are starting with an empty cache. assert cache_schema_is_empty(get_db(), check_internal_tables_are_empty=False) # Send message to run the daily_location query, and check it was accepted reply = send_zmq_message_and_receive_reply(msg_run_query, port=zmq_port, host="localhost") assert reply["status"] == "success" query_id = reply["payload"]["query_id"] # Wait until the query has finished. poll_until_done(zmq_port, query_id) # Check that the cache contains the correct tables. assert sorted(expected_cache_tables) == get_cache_tables(get_db())