async def action_handler__get_sql(config: "FlowmachineServerConfig", query_id: str) -> ZMQReply: """ Handler for the 'get_sql' action. Returns a SQL string which can be run against flowdb to obtain the result of the query with given `query_id`. """ # TODO: currently we can't use QueryStateMachine to determine whether # the query_id belongs to a valid query object, so we need to check it # manually. Would be good to add a QueryState.UNKNOWN so that we can # avoid this separate treatment. q_info_lookup = QueryInfoLookup(get_redis()) if not q_info_lookup.query_is_known(query_id): msg = f"Unknown query id: '{query_id}'" payload = {"query_id": query_id, "query_state": "awol"} return ZMQReply(status="error", msg=msg, payload=payload) query_state = QueryStateMachine(get_redis(), query_id, get_db().conn_id).current_query_state if query_state == QueryState.COMPLETED: q = get_query_object_by_id(get_db(), query_id) sql = q.get_query() payload = { "query_id": query_id, "query_state": query_state, "sql": sql } return ZMQReply(status="success", payload=payload) else: msg = f"Query with id '{query_id}' {query_state.description}." payload = {"query_id": query_id, "query_state": query_state} return ZMQReply(status="error", msg=msg, payload=payload)
async def action_handler__poll_query(config: "FlowmachineServerConfig", query_id: str) -> ZMQReply: """ Handler for the 'poll_query' action. Returns the status of the query with the given `query_id`. """ query_kind = _get_query_kind_for_query_id(query_id) # TODO: we should probably be able to use the QueryStateMachine to determine # whether the query already exists. if query_kind is None: payload = {"query_id": query_id, "query_state": "awol"} return ZMQReply(status="error", msg=f"Unknown query id: '{query_id}'", payload=payload) else: q_state_machine = QueryStateMachine(get_redis(), query_id, get_db().conn_id) payload = { "query_id": query_id, "query_kind": query_kind, "query_state": q_state_machine.current_query_state, "progress": query_progress(FlowmachineQuerySchema().load( QueryInfoLookup(get_redis()).get_query_params( query_id))._flowmachine_query_obj), } return ZMQReply(status="success", payload=payload)
def real_connections(flowmachine_connect): with connections(): try: yield finally: reset_cache(get_db(), get_redis(), protect_table_objects=False) get_db().engine.dispose() # Close the connection get_redis().flushdb() # Empty the redis
def reset_flowdb_and_redis(fm_conn): """ Reset flowdb into a pristine state (by resetting the cache schema) and delete any existing keys from redis. This fixture is automatically run before every test so that each test has a clean database to work with. """ print("[DDD] Resetting flowdb and redis into a pristine state") reset_cache_schema(get_db(), redis_instance=get_redis()) delete_all_redis_keys(redis_instance=get_redis())
def test_cache_reset(flowmachine_connect): """ Test that cache and redis are both reset. """ stored_query = daily_location("2016-01-01").store().result() assert (QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id).current_query_state == QueryState.COMPLETED) assert stored_query.is_stored reset_cache(get_db(), get_redis()) assert (QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id).current_query_state == QueryState.KNOWN) assert not stored_query.is_stored
async def test_query_run_logged(json_log, server_config): # Local import so pytest can capture stdout logger = getLogger("flowmachine.query_run_log") logger.handlers[0].stream = sys.stdout # Reset log stream for capsys msg_contents = { "action": "run_query", "request_id": "DUMMY_API_REQUEST_ID", "params": { "query_kind": "dummy_query", "dummy_param": "DUMMY" }, } set_log_level( "flowmachine.debug", "ERROR") # Logging of query runs should be independent of other logs get_redis().get.return_value = ( b"known" # Mock enough redis to get to the log messages ) reply = await get_reply_for_message(msg_str=json.dumps(msg_contents), config=server_config) log_lines = json_log() print(reply) log_lines = log_lines.out assert log_lines[0]["action_request"][ "request_id"] == "DUMMY_API_REQUEST_ID" assert log_lines[0]["action_request"]["action"] == "run_query" assert log_lines[0]["logger"] == "flowmachine.query_run_log"
def get_query(self): """ Returns a string representing an SQL query. The string will point to the database cache of this query if it exists. Returns ------- str SQL query string. """ try: table_name = self.fully_qualified_table_name schema, name = table_name.split(".") state_machine = QueryStateMachine(get_redis(), self.query_id, get_db().conn_id) state_machine.wait_until_complete() if state_machine.is_completed and get_db().has_table(schema=schema, name=name): try: touch_cache(get_db(), self.query_id) except ValueError: pass # Cache record not written yet, which can happen for Models # which will call through to this method from their `_make_query` method while writing metadata. # In that scenario, the table _is_ written, but won't be visible from the connection touch_cache uses # as the cache metadata transaction isn't complete! return "SELECT * FROM {}".format(table_name) except NotImplementedError: pass return self._make_query()
def start_flowmachine_server_with_or_without_dependency_caching( request, logging_config, monkeypatch ): """ Starts a FlowMachine server in a separate process, with function scope (i.e. a server will be started and stopped for each test that uses this fixture). Tests using this fixture will run twice: once with dependency caching disabled, and again with dependency caching enabled. """ # Ensure this server runs on a different port from the session-scoped server main_zmq_port = os.getenv("FLOWMACHINE_PORT", "5555") monkeypatch.setenv("FLOWMACHINE_PORT", str(int(main_zmq_port) + 1)) # Turn dependency caching on or off monkeypatch.setenv("FLOWMACHINE_SERVER_DISABLE_DEPENDENCY_CACHING", request.param) # Start the server fm_thread = Process(target=flowmachine.core.server.server.main) fm_thread.start() # Create a new flowmachine connection, because we can't use the old one after starting a new process. new_conn = make_flowmachine_connection_object() with flowmachine.core.context.context(new_conn, get_executor(), get_redis()): yield new_conn.close() fm_thread.terminate() sleep(2) # Wait a moment to make sure coverage of subprocess finishes being written
def test_blocks_on_store_cascades(): """ If a store is running on a query that is used in a another query, that query should wait. """ dl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("cell")) dl2 = daily_location("2016-01-02", spatial_unit=make_spatial_unit("cell")) store_future = dl.store() store_future.result() hl = ModalLocation(dl, dl2) timer = [] def unlock(timer, redis, db_id): qsm = QueryStateMachine(redis, dl.query_id, db_id) qsm.enqueue() for i in range(101): timer.append(i) qsm.execute() qsm.finish() timeout = Thread(target=unlock, args=(timer, get_redis(), get_db().conn_id)) timeout.start() hl.get_query() assert len(timer) == 101 timeout.join()
async def test_rerun_query_after_cancelled(server_config, real_connections): """ Test that a query can be rerun after it has been cancelled. """ query_obj = (FlowmachineQuerySchema().load( dict( query_kind="spatial_aggregate", locations=dict( query_kind="daily_location", date="2016-01-01", method="last", aggregation_unit="admin3", ), ))._flowmachine_query_obj) query_id = query_obj.query_id qsm = QueryStateMachine(get_redis(), query_id, get_db().conn_id) qsm.enqueue() qsm.cancel() assert not query_obj.is_stored assert qsm.is_cancelled query_info_lookup = QueryInfoLookup(get_redis()) query_info_lookup.register_query( query_id, dict( query_kind="spatial_aggregate", locations=dict( query_kind="daily_location", date="2016-01-01", method="last", aggregation_unit="admin3", ), ), ) msg = await action_handler__run_query( config=server_config, query_kind="spatial_aggregate", locations=dict( query_kind="daily_location", date="2016-01-01", method="last", aggregation_unit="admin3", ), ) assert msg["status"] == ZMQReplyStatus.SUCCESS qsm.wait_until_complete() assert query_obj.is_stored
async def test_get_query_bad_id(server_config): """ Get sql handler should send back an error status for a nonexistent id """ get_redis().get.return_value = None msg = await action_handler__get_query_params(config=server_config, query_id="DUMMY_ID") assert msg.status == ZMQReplyStatus.ERROR
def redis(): """ Return redis instance to use when running the tests. Currently this is hardcoded to get_redis() but this fixture avoids hard-coding it in all our tests. """ return get_redis()
def write_model_result(query_ddl_ops: List[str], connection: Engine) -> float: if store_dependencies: store_all_unstored_dependencies(self) self._df.to_sql(name, connection, schema=schema, index=False) QueryStateMachine(get_redis(), self.query_id, get_db().conn_id).finish() return self._runtime
def test_drop_query_errors(): """Test that resetting a query's cache will error if in a state where that isn't possible.""" q = DummyQuery(dummy_id=1, sleep_time=5) qsm = QueryStateMachine(get_redis(), q.query_id, get_db().conn_id) # Mark the query as in the process of resetting qsm.enqueue() qsm.execute() with pytest.raises(QueryResetFailedException): q.invalidate_db_cache()
def test_cache_reset_protects_tables(flowmachine_connect): """ Resetting the cache should preserve Table entries. """ # Regression test for https://github.com/Flowminder/FlowKit/issues/832 dl_query = daily_location(date="2016-01-03", method="last") reset_cache(get_db(), get_redis()) for dep in dl_query._get_stored_dependencies(): assert dep.query_id in [x.query_id for x in Query.get_stored()] dl_query.store().result() # Original bug caused this to error
def test_store_exceptions(fail_event, expected_exception): """Test that exceptions are raised when watching a store op triggered elsewhere.""" q = DummyQuery(dummy_id=1, sleep_time=5) qsm = QueryStateMachine(get_redis(), q.query_id, get_db().conn_id) # Mark the query as having begun executing elsewhere qsm.enqueue() qsm.execute() q_fut = q.store() qsm.trigger_event(fail_event) with pytest.raises(expected_exception): raise q_fut.exception()
def test_redis_resync_runtimeerror(flowmachine_connect, dummy_redis): """ Test that a runtime error is raised if redis is being updated from multiple places when trying to resync. """ stored_query = daily_location("2016-01-01").store().result() assert (QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id).current_query_state == QueryState.COMPLETED) dummy_redis.allow_flush = False with pytest.raises(RuntimeError): resync_redis_with_cache(get_db(), dummy_redis)
def query_state(self) -> "QueryState": """ Return the current query state. Returns ------- flowmachine.core.query_state.QueryState The current query state """ state_machine = QueryStateMachine(get_redis(), self.query_id, get_db().conn_id) return state_machine.current_query_state
async def action_handler__get_geo_sql(config: "FlowmachineServerConfig", query_id: str) -> ZMQReply: """ Handler for the 'get_sql' action. Returns a SQL string which can be run against flowdb to obtain the result of the query with given `query_id`. """ # TODO: currently we can't use QueryStateMachine to determine whether # the query_id belongs to a valid query object, so we need to check it # manually. Would be good to add a QueryState.UNKNOWN so that we can # avoid this separate treatment. q_info_lookup = QueryInfoLookup(get_redis()) if not q_info_lookup.query_is_known(query_id): msg = f"Unknown query id: '{query_id}'" payload = {"query_id": query_id, "query_state": "awol"} return ZMQReply(status="error", msg=msg, payload=payload) query_state = QueryStateMachine(get_redis(), query_id, get_db().conn_id).current_query_state if query_state == QueryState.COMPLETED: q = get_query_object_by_id(get_db(), query_id) try: sql = q.geojson_query() payload = { "query_id": query_id, "query_state": query_state, "sql": sql, "aggregation_unit": q.spatial_unit.canonical_name, } return ZMQReply(status="success", payload=payload) except AttributeError: msg = f"Query with id '{query_id}' has no geojson compatible representation." # TODO: This codepath is untested because all queries right now have geography payload = {"query_id": query_id, "query_state": "errored"} return ZMQReply(status="error", msg=msg, payload=payload) else: msg = f"Query with id '{query_id}' {query_state.description}." payload = {"query_id": query_id, "query_state": query_state} return ZMQReply(status="error", msg=msg, payload=payload)
def test_redis_resync(flowmachine_connect): """ Test that redis states can be resynced to the flowdb cache. """ stored_query = daily_location("2016-01-01").store().result() assert ( QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id ).current_query_state == QueryState.COMPLETED ) assert stored_query.is_stored get_redis().flushdb() assert stored_query.is_stored assert ( QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id ).current_query_state == QueryState.KNOWN ) resync_redis_with_cache(get_db(), get_redis()) assert ( QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id ).current_query_state == QueryState.COMPLETED )
def unstored_dependencies_graph(query_obj: "Query") -> nx.DiGraph: """ Produce a dependency graph of the unstored queries on which this query depends. Parameters ---------- query_obj : Query Query object to produce a dependency graph for. Returns ------- networkx.DiGraph Notes ----- If store() or invalidate_db_cache() is called on any query while this function is executing, the resulting graph may not be correct. The queries listed as dependencies are not _guaranteed_ to be used in the actual running of a query, only to be referenced by it. """ deps = [] if not query_obj.is_stored: openlist = list( zip([query_obj] * len(query_obj.dependencies), query_obj.dependencies)) while openlist: y, x = openlist.pop() if y is query_obj: # We don't want to include this query in the graph, only its dependencies. y = None # Wait for query to complete before checking whether it's stored. q_state_machine = QueryStateMachine(get_redis(), x.query_id, get_db().conn_id) q_state_machine.wait_until_complete() if not x.is_stored: deps.append((y, x)) openlist += list(zip([x] * len(x.dependencies), x.dependencies)) def get_node_attrs(q): attrs = {} attrs["query_object"] = q attrs["name"] = q.__class__.__name__ attrs["stored"] = False attrs["shape"] = "rect" attrs["label"] = f"{attrs['name']}." return attrs return _assemble_dependency_graph(dependencies=deps, attrs_func=get_node_attrs)
def test_drop_query_blocks(monkeypatch): """Test that resetting a query's cache will block if that's already happening.""" monkeypatch.setattr(flowmachine.core.query, "_sleep", Mock(side_effect=BlockingIOError)) q = DummyQuery(dummy_id=1, sleep_time=5) qsm = QueryStateMachine(get_redis(), q.query_id, get_db().conn_id) # Mark the query as in the process of resetting qsm.enqueue() qsm.execute() qsm.finish() qsm.reset() with pytest.raises(BlockingIOError): q.invalidate_db_cache()
def __call__(self, value) -> Union[None, str]: from flowmachine.core.server.query_schemas import FlowmachineQuerySchema if (value is not None) and (value is not missing): try: (FlowmachineQuerySchema().load( QueryInfoLookup(get_redis()).get_query_params( value))._flowmachine_query_obj) except UnkownQueryIdError: if not cache_table_exists(get_db(), value): raise ValidationError("Must be None or a valid query id.") return value
async def action_handler__get_query_params(config: "FlowmachineServerConfig", query_id: str) -> ZMQReply: """ Handler for the 'get_query_params' action. Returns query parameters of the query with the given `query_id`. """ q_info_lookup = QueryInfoLookup(get_redis()) try: query_params = q_info_lookup.get_query_params(query_id) except UnkownQueryIdError: payload = {"query_id": query_id, "query_state": "awol"} return ZMQReply(status="error", msg=f"Unknown query id: '{query_id}'", payload=payload) payload = {"query_id": query_id, "query_params": query_params} return ZMQReply(status="success", payload=payload)
def deserialize( self, value: typing.Any, attr: str = None, data: typing.Mapping[str, typing.Any] = None, **kwargs, ) -> Union[None, Table]: from flowmachine.core.server.query_schemas import FlowmachineQuerySchema table_name = super().deserialize(value, attr, data, **kwargs) if (table_name is missing) or (table_name is None): return table_name else: try: return (FlowmachineQuerySchema().load( QueryInfoLookup(get_redis()).get_query_params( value))._flowmachine_query_obj) except UnkownQueryIdError: return get_query_object_by_id(get_db(), value)
def test_cache_metadata_write_error(flowmachine_connect, dummy_redis, monkeypatch): """ Test that errors during cache metadata writing leave the query state machine in error state. """ # Regression test for https://github.com/Flowminder/FlowKit/issues/833 writer_mock = Mock(side_effect=TestException) dl_query = daily_location(date="2016-01-03", method="last") assert not dl_query.is_stored monkeypatch.setattr("flowmachine.core.cache.write_cache_metadata", writer_mock) store_future = dl_query.store() with pytest.raises(TestException): store_future.result() assert not dl_query.is_stored assert (QueryStateMachine( get_redis(), dl_query.query_id, get_db().conn_id).current_query_state == QueryState.ERRORED)
def _get_query_kind_for_query_id(query_id: str) -> Union[None, str]: """ Helper function to look up the query kind corresponding to the given query id. Returns `None` if the query_id does not exist. Parameters ---------- query_id : str Identifier of the query. Returns ------- str or None The query kind associated with this query_id (or None if no query with this query_id exists). """ q_info_lookup = QueryInfoLookup(get_redis()) try: return q_info_lookup.get_query_kind(query_id) except UnkownQueryIdError: return None
def test_get_query_blocks_on_store(): """ If a store is running get_query should block. """ dl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("cell")) dl.store().result() timer = [] def unlock(timer, redis, db_id): qsm = QueryStateMachine(redis, dl.query_id, db_id) qsm.enqueue() for i in range(101): timer.append(i) qsm.execute() qsm.finish() timeout = Thread(target=unlock, args=(timer, get_redis(), get_db().conn_id)) timeout.start() dl.get_query() assert len(timer) == 101 timeout.join()
def mocked_connections(monkeypatch): """ Fixture which mocks out the setup methods for logger, connection, redis and threadpool and yields the mocks. Parameters ---------- monkeypatch Yields ------ tuple of mocks Mocks for init_logging, Connection, StrictRedis and _start_threadpool """ logging_mock = Mock() connection_mock = Mock() connection_mock.return_value.engine.begin.return_value.__enter__ = Mock() connection_mock.return_value.engine.begin.return_value.__exit__ = Mock() connection_mock.return_value.fetch.return_value = MagicMock( return_value=[]) redis_mock = Mock(name="mocked_connections_redis") tp_mock = Mock(return_value=None) monkeypatch.setattr(flowmachine.core.init, "set_log_level", logging_mock) monkeypatch.setattr(flowmachine.core.init, "Connection", connection_mock) monkeypatch.setattr("redis.StrictRedis", redis_mock) monkeypatch.setattr(concurrent.futures.thread.ThreadPoolExecutor, "__init__", tp_mock) # get any existing context connection = get_db() redis = get_redis() tp = get_executor() yield logging_mock, connection_mock, redis_mock, tp_mock # Reset context bind_context(connection, tp, redis)
async def test_rerun_query_after_removed_from_cache(dummy_redis, server_config, real_connections): """ Test that a query can be rerun after it has been removed from the cache. """ msg = await action_handler__run_query( config=server_config, query_kind="spatial_aggregate", locations=dict( query_kind="daily_location", date="2016-01-01", method="last", aggregation_unit="admin3", ), ) query_id = msg["payload"]["query_id"] qsm = QueryStateMachine(get_redis(), query_id, get_db().conn_id) qsm.wait_until_complete() query_obj = get_query_object_by_id(get_db(), query_id) assert query_obj.is_stored query_obj.invalidate_db_cache() assert not query_obj.is_stored assert qsm.is_known msg = await action_handler__run_query( config=server_config, query_kind="spatial_aggregate", locations=dict( query_kind="daily_location", date="2016-01-01", method="last", aggregation_unit="admin3", ), ) assert msg["status"] == ZMQReplyStatus.SUCCESS qsm.wait_until_complete() assert query_obj.is_stored