def test_max_date(flowmachine_connect): """ Test connection.max_date """ assert get_db().max_date().strftime("%Y-%m-%d") == "2016-01-07" assert get_db().max_date(table="all").strftime("%Y-%m-%d") == "2016-01-07"
def test_touch_cache_record_for_table(flowmachine_connect): """ Touching a cache record for a table should update access count and last accessed but not touch score, or counter. """ table = Table("events.calls_20160101") get_db().engine.execute( f"UPDATE cache.cached SET compute_time = 1 WHERE query_id=%s", table.query_id ) # Compute time for tables is zero, so set to 1 to avoid zeroing out assert 0 == get_score(get_db(), table.query_id) assert (1 == get_db().fetch( f"SELECT access_count FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0]) accessed_at = get_db().fetch( f"SELECT last_accessed FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0] touch_cache(get_db(), table.query_id) assert 0 == get_score(get_db(), table.query_id) assert (2 == get_db().fetch( f"SELECT access_count FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0]) # No cache touch should be recorded assert 2 == get_db().fetch("SELECT nextval('cache.cache_touches');")[0][0] assert (accessed_at < get_db().fetch( f"SELECT last_accessed FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0])
def test_tables(flowmachine_connect): """ Private method that creates a test table and adds data into it. """ q = """ BEGIN; CREATE TABLE IF NOT EXISTS test_table_a ( id NUMERIC PRIMARY KEY, field TEXT ); INSERT INTO test_table_a VALUES ('1', 'foo') ON CONFLICT (id) DO NOTHING; CREATE TABLE IF NOT EXISTS test_table_b ( id NUMERIC PRIMARY KEY, field TEXT, numeric_field NUMERIC ); INSERT INTO test_table_b VALUES ('1', 'foo', '300') ON CONFLICT (id) DO NOTHING; END; """ get_db().engine.execute(q) yield flowmachine_connect q = """ DROP TABLE IF EXISTS test_table_a; DROP TABLE IF EXISTS test_table_b; """ get_db().engine.execute(q)
def test_invalidate_cascade(flowmachine_connect): """ Test that invalidation does not cascade if cascade=False. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() assert dl1.is_stored assert hl1.is_stored assert flow.is_stored dl1.invalidate_db_cache(cascade=False) assert not dl1.is_stored assert hl1.is_stored assert flow.is_stored assert not cache_table_exists(get_db(), dl1.query_id) assert cache_table_exists(get_db(), hl1.query_id) has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies")) assert has_deps
def test_query_progress(dummy_redis): """ Test correct counts for dependency progress are returned. """ dummy = DummyQuery(dummy_param="DUMMY") queued_qsm = QueryStateMachine(dummy_redis, dummy.query_id, get_db().conn_id) queued_qsm.enqueue() stored_dummy = DummyQuery(dummy_param="STORED_DUMMY") stored_dummy.store() executing_dummy = DummyQuery(dummy_param="EXECUTING_DUMMY") executing_qsm = QueryStateMachine(dummy_redis, executing_dummy.query_id, get_db().conn_id) executing_qsm.enqueue() executing_qsm.execute() nested = DummyQuery(dummy_param=[dummy, stored_dummy, executing_dummy]) assert query_progress(nested) == dict( eligible=3, running=1, queued=1, ) nested.store() assert query_progress(nested) == dict( eligible=0, running=0, queued=0, )
def test_redis_resync(flowmachine_connect): """ Test that redis states can be resynced to the flowdb cache. """ stored_query = daily_location("2016-01-01").store().result() assert ( QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id ).current_query_state == QueryState.COMPLETED ) assert stored_query.is_stored get_redis().flushdb() assert stored_query.is_stored assert ( QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id ).current_query_state == QueryState.KNOWN ) resync_redis_with_cache(get_db(), get_redis()) assert ( QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id ).current_query_state == QueryState.COMPLETED )
async def action_handler__get_sql(config: "FlowmachineServerConfig", query_id: str) -> ZMQReply: """ Handler for the 'get_sql' action. Returns a SQL string which can be run against flowdb to obtain the result of the query with given `query_id`. """ # TODO: currently we can't use QueryStateMachine to determine whether # the query_id belongs to a valid query object, so we need to check it # manually. Would be good to add a QueryState.UNKNOWN so that we can # avoid this separate treatment. q_info_lookup = QueryInfoLookup(get_redis()) if not q_info_lookup.query_is_known(query_id): msg = f"Unknown query id: '{query_id}'" payload = {"query_id": query_id, "query_state": "awol"} return ZMQReply(status="error", msg=msg, payload=payload) query_state = QueryStateMachine(get_redis(), query_id, get_db().conn_id).current_query_state if query_state == QueryState.COMPLETED: q = get_query_object_by_id(get_db(), query_id) sql = q.get_query() payload = { "query_id": query_id, "query_state": query_state, "sql": sql } return ZMQReply(status="success", payload=payload) else: msg = f"Query with id '{query_id}' {query_state.description}." payload = {"query_id": query_id, "query_state": query_state} return ZMQReply(status="error", msg=msg, payload=payload)
def test_invalidate_cache_midchain(flowmachine_connect): """ Test that invalidating a query in the middle of a chain drops the top of the chain and this link, but not the bottom. """ dl1 = daily_location("2016-01-01") dl1.store().result() hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl1.store().result() hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) flow.store().result() assert dl1.is_stored assert hl1.is_stored assert flow.is_stored hl1.invalidate_db_cache() assert dl1.is_stored assert not hl1.is_stored assert not flow.is_stored assert cache_table_exists(get_db(), dl1.query_id) assert not cache_table_exists(get_db(), hl1.query_id) assert not cache_table_exists(get_db(), flow.query_id) has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies")) assert has_deps # Daily location deps should remain
def get_query(self): """ Returns a string representing an SQL query. The string will point to the database cache of this query if it exists. Returns ------- str SQL query string. """ try: table_name = self.fully_qualified_table_name schema, name = table_name.split(".") state_machine = QueryStateMachine(get_redis(), self.query_id, get_db().conn_id) state_machine.wait_until_complete() if state_machine.is_completed and get_db().has_table(schema=schema, name=name): try: touch_cache(get_db(), self.query_id) except ValueError: pass # Cache record not written yet, which can happen for Models # which will call through to this method from their `_make_query` method while writing metadata. # In that scenario, the table _is_ written, but won't be visible from the connection touch_cache uses # as the cache metadata transaction isn't complete! return "SELECT * FROM {}".format(table_name) except NotImplementedError: pass return self._make_query()
def test_cache_table_exists(flowmachine_connect): """ Test that cache_table_exists reports accurately. """ assert not cache_table_exists(get_db(), "NONEXISTENT_CACHE_ID") assert cache_table_exists( get_db(), daily_location("2016-01-01").store().result().query_id)
def test_get_set_cache_half_life(flowmachine_connect_with_cache_settings_reset): """ Test that cache halflife can be got and set """ assert 1000 == get_cache_half_life(get_db()) # Now set it to something set_cache_half_life(get_db(), 10) assert 10 == get_cache_half_life(get_db())
def test_table_schema(flowmachine_connect): """ Fixture which creates a schema called 'tests' before every test and destroys it again after the test has finished. """ get_db().engine.execute("CREATE SCHEMA IF NOT EXISTS tests") yield get_db().engine.execute("DROP SCHEMA tests CASCADE")
def real_connections(flowmachine_connect): with connections(): try: yield finally: reset_cache(get_db(), get_redis(), protect_table_objects=False) get_db().engine.dispose() # Close the connection get_redis().flushdb() # Empty the redis
def test_do_cache_simple(flowmachine_connect): """ Test that a simple object can be cached. """ dl1 = daily_location("2016-01-01") write_cache_metadata(get_db(), dl1) assert cache_table_exists(get_db(), dl1.query_id)
def test_get_set_cache_size_limit(flowmachine_connect_with_cache_settings_reset): """ Test that cache size can be got and set """ # Initial setting depends on the disk space of the FlowDB container so just check it is nonzero assert get_max_size_of_cache(get_db()) > 0 # Now set it to something set_max_size_of_cache(get_db(), 10) assert 10 == get_max_size_of_cache(get_db())
def test_size_of_table(flowmachine_connect): """ Test that table size is reported correctly. """ dl = daily_location("2016-01-01").store().result() total_cache_size = get_size_of_cache(get_db()) table_size = get_size_of_table(get_db(), dl.table_name, "cache") assert total_cache_size == table_size
def test_table_records_removed(flowmachine_connect): """Test that removing a query from cache removes any Tables in cache that pointed to it.""" dl = daily_location("2016-01-01") dl.store().result() assert dl.is_stored table = dl.get_table() assert cache_table_exists(get_db(), table.query_id) dl.invalidate_db_cache() assert not cache_table_exists(get_db(), table.query_id)
def test_shrink_to_size_does_nothing_when_cache_ok(flowmachine_connect): """ Test that shrink_below_size doesn't remove anything if cache size is within limit. """ dl = daily_location("2016-01-01").store().result() removed_queries = shrink_below_size(get_db(), get_size_of_cache(get_db()), protected_period=-1) assert 0 == len(removed_queries) assert dl.is_stored
def test_redis_resync_runtimeerror(flowmachine_connect, dummy_redis): """ Test that a runtime error is raised if redis is being updated from multiple places when trying to resync. """ stored_query = daily_location("2016-01-01").store().result() assert (QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id).current_query_state == QueryState.COMPLETED) dummy_redis.allow_flush = False with pytest.raises(RuntimeError): resync_redis_with_cache(get_db(), dummy_redis)
def test_size_of_cache(flowmachine_connect): """ Test that cache size is reported correctly. """ dl = daily_location("2016-01-01").store().result() dl_aggregate = dl.aggregate().store().result() total_cache_size = get_size_of_cache(get_db()) removed_query, table_size_a = shrink_one(get_db(), protected_period=-1) removed_query, table_size_b = shrink_one(get_db(), protected_period=-1) assert total_cache_size == table_size_a + table_size_b assert 0 == get_size_of_cache(get_db())
def test_do_cache_multi(flowmachine_connect): """ Test that a query containing subqueries can be cached. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) write_cache_metadata(get_db(), hl1) assert cache_table_exists(get_db(), hl1.query_id)
def test_shrink_to_size_removes_queries(flowmachine_connect): """ Test that shrink_below_size removes queries when cache limit is breached. """ dl = daily_location("2016-01-01").store().result() removed_queries = shrink_below_size( get_db(), get_size_of_cache(get_db()) - 1, protected_period=-1, ) assert 1 == len(removed_queries) assert not dl.is_stored
def test_geojson_caching_off(): """Test that switching off caching clears the cache, and doesn't add to it.""" dl = daily_location("2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("lon-lat")).aggregate() js = dl.to_geojson(crs=2770) # OSGB36 dl.turn_off_caching() # Check caching for geojson switches off with pytest.raises(KeyError): dl._geojson[proj4string(get_db(), 2770)] js = dl.to_geojson(crs=2770) # OSGB36 with pytest.raises(KeyError): dl._geojson[proj4string(get_db(), 2770)]
def test_do_cache_nested(flowmachine_connect): """ Test that a query containing nested subqueries can be cached. """ hl1 = ModalLocation(daily_location("2016-01-01"), daily_location("2016-01-02")) hl2 = ModalLocation(daily_location("2016-01-03"), daily_location("2016-01-04")) flow = Flows(hl1, hl2) write_cache_metadata(get_db(), flow) assert cache_table_exists(get_db(), flow.query_id)
def test_cache_reset(flowmachine_connect): """ Test that cache and redis are both reset. """ stored_query = daily_location("2016-01-01").store().result() assert (QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id).current_query_state == QueryState.COMPLETED) assert stored_query.is_stored reset_cache(get_db(), get_redis()) assert (QueryStateMachine( get_redis(), stored_query.query_id, get_db().conn_id).current_query_state == QueryState.KNOWN) assert not stored_query.is_stored
def test_can_force_rewrite(flowmachine_connect, get_length): """ Test that we can force the rewrite of a test to the database. """ query = EventTableSubset(start="2016-01-01", stop="2016-01-01 01:00:00") query.to_sql(name="test_rewrite", schema="tests").result() # We're going to delete everything from the table, then # force a rewrite, and check that the table now has data. sql = """DELETE FROM tests.test_rewrite""" get_db().engine.execute(sql) assert 0 == get_length(Table("tests.test_rewrite")) query.invalidate_db_cache(name="test_rewrite", schema="tests") query.to_sql(name="test_rewrite", schema="tests").result() assert 1 < get_length(Table("tests.test_rewrite"))
def do_get(): if self._cache: try: return self._df.copy() except AttributeError: qur = f"SELECT {self.column_names_as_string_list} FROM ({self.get_query()}) _" with get_db().engine.begin(): self._df = pd.read_sql_query(qur, con=get_db().engine) return self._df.copy() else: qur = f"SELECT {self.column_names_as_string_list} FROM ({self.get_query()}) _" with get_db().engine.begin(): return pd.read_sql_query(qur, con=get_db().engine)
async def action_handler__poll_query(config: "FlowmachineServerConfig", query_id: str) -> ZMQReply: """ Handler for the 'poll_query' action. Returns the status of the query with the given `query_id`. """ query_kind = _get_query_kind_for_query_id(query_id) # TODO: we should probably be able to use the QueryStateMachine to determine # whether the query already exists. if query_kind is None: payload = {"query_id": query_id, "query_state": "awol"} return ZMQReply(status="error", msg=f"Unknown query id: '{query_id}'", payload=payload) else: q_state_machine = QueryStateMachine(get_redis(), query_id, get_db().conn_id) payload = { "query_id": query_id, "query_kind": query_kind, "query_state": q_state_machine.current_query_state, "progress": query_progress(FlowmachineQuerySchema().load( QueryInfoLookup(get_redis()).get_query_params( query_id))._flowmachine_query_obj), } return ZMQReply(status="success", payload=payload)
def __iter__(self): con = get_db().engine qur = self.get_query() with con.begin(): self._query_object = con.execute(qur) return self
def test_blocks_on_store_cascades(): """ If a store is running on a query that is used in a another query, that query should wait. """ dl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("cell")) dl2 = daily_location("2016-01-02", spatial_unit=make_spatial_unit("cell")) store_future = dl.store() store_future.result() hl = ModalLocation(dl, dl2) timer = [] def unlock(timer, redis, db_id): qsm = QueryStateMachine(redis, dl.query_id, db_id) qsm.enqueue() for i in range(101): timer.append(i) qsm.execute() qsm.finish() timeout = Thread(target=unlock, args=(timer, get_redis(), get_db().conn_id)) timeout.start() hl.get_query() assert len(timer) == 101 timeout.join()