def test_unstored_dependencies_graph(): """ Test that unstored_dependencies_graph returns the correct graph in an example case. """ # Create dummy queries with dependency structure # # 5:unstored # / \ # 3:stored 4:unstored # / \ / # 1:unstored 2:unstored # # Note: we add a string parameter to each query so that they have different query IDs dummy1 = DummyQuery(dummy_param=["dummy1"]) dummy2 = DummyQuery(dummy_param=["dummy2"]) dummy3 = DummyQuery(dummy_param=["dummy3", dummy1, dummy2]) dummy4 = DummyQuery(dummy_param=["dummy4", dummy2]) dummy5 = DummyQuery(dummy_param=["dummy5", dummy3, dummy4]) dummy3.store() expected_query_nodes = [dummy2, dummy4] graph = unstored_dependencies_graph(dummy5) assert not any(dict(graph.nodes(data="stored")).values()) assert len(graph) == len(expected_query_nodes) for query in expected_query_nodes: assert f"x{query.query_id}" in graph.nodes() assert (graph.nodes[f"x{query.query_id}"]["query_object"].query_id == query.query_id)
def test_unstored_dependencies_graph_for_stored_query(): """ Test that the unstored dependencies graph for a stored query is empty. """ dummy1 = DummyQuery(dummy_param=["dummy1"]) dummy2 = DummyQuery(dummy_param=["dummy2"]) dummy3 = DummyQuery(dummy_param=["dummy3", dummy1, dummy2]) dummy3.store() graph = unstored_dependencies_graph(dummy3) assert len(graph) == 0
def to_sql( self, name: str, schema: Union[str, None] = None, store_dependencies: bool = False, ) -> Future: """ Store the result of the calculation back into the database. Parameters ---------- name : str name of the table schema : str, default None Name of an existing schema. If none will use the postgres default, see postgres docs for more info. store_dependencies : bool, default False If True, store the dependencies of this query. Returns ------- Future Future object, containing this query and any result information. Notes ----- This method will return a Future immediately. """ if len(name) > MAX_POSTGRES_NAME_LENGTH: err_msg = ( "The table name {} is too long ({}) chars. Postgres allows only table names" " of length {}").format(name, len(name), MAX_POSTGRES_NAME_LENGTH) raise NameTooLongError(err_msg) def write_query(query_ddl_ops: List[str], connection: Engine) -> float: plan_time = 0 ddl_op_results = [] for ddl_op in query_ddl_ops: try: ddl_op_result = connection.execute(ddl_op) except Exception as e: logger.error( f"Error executing SQL: '{ddl_op}'. Error was {e}") raise e try: ddl_op_results.append(ddl_op_result.fetchall()) except ResourceClosedError: pass # Nothing to do here for ddl_op_result in ddl_op_results: try: plan = ddl_op_result[0][0][0] # Should be a query plan plan_time += plan["Execution Time"] except (IndexError, KeyError): pass # Not an explain result logger.debug("Executed queries.") return plan_time if store_dependencies: store_queries_in_order(unstored_dependencies_graph( self)) # Need to ensure we're behind our deps in the queue ddl_ops_func = self._make_sql current_state, changed_to_queue = QueryStateMachine( get_redis(), self.query_id, get_db().conn_id).enqueue() logger.debug( f"Attempted to enqueue query '{self.query_id}', query state is now {current_state} and change happened {'here and now' if changed_to_queue else 'elsewhere'}." ) # name, redis, query, connection, ddl_ops_func, write_func, schema = None, sleep_duration = 1 store_future = submit_to_executor( write_query_to_cache, name=name, schema=schema, query=self, connection=get_db(), redis=get_redis(), ddl_ops_func=ddl_ops_func, write_func=write_query, ) return store_future
def test_cache_content( start_flowmachine_server_with_or_without_dependency_caching, fm_conn, redis): """ Run a query with dependency caching turned on, and check that its dependencies are cached. Run a query with dependency caching turned off, and check that only the query itself is cached. """ # Can't use the zmq_port fixture here as we're running against a different FlowMachine server zmq_port = os.getenv("FLOWMACHINE_PORT") msg_run_query = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="last", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, ))) # Get list of tables that should be cached expected_cache_tables = [q.table_name] if "false" == os.getenv("FLOWMACHINE_SERVER_DISABLE_DEPENDENCY_CACHING"): dependencies = unstored_dependencies_graph(q) for node, query_obj in dependencies.nodes(data="query_object"): try: schema, table_name = query_obj.fully_qualified_table_name.split( ".") if schema == "cache": expected_cache_tables.append(table_name) except NotImplementedError: # Some queries cannot be cached, and don't have table names pass # Check that we are starting with an empty cache. assert cache_schema_is_empty(get_db(), check_internal_tables_are_empty=False) # Send message to run the daily_location query, and check it was accepted reply = send_zmq_message_and_receive_reply(msg_run_query, port=zmq_port, host="localhost") assert reply["status"] == "success" query_id = reply["payload"]["query_id"] # Wait until the query has finished. poll_until_done(zmq_port, query_id) # Check that the cache contains the correct tables. assert sorted(expected_cache_tables) == get_cache_tables(get_db())