Пример #1
0
def test_max_date(flowmachine_connect):
    """
    Test connection.max_date
    """

    assert get_db().max_date().strftime("%Y-%m-%d") == "2016-01-07"
    assert get_db().max_date(table="all").strftime("%Y-%m-%d") == "2016-01-07"
Пример #2
0
def test_touch_cache_record_for_table(flowmachine_connect):
    """
    Touching a cache record for a table should update access count and last accessed but not touch score, or counter.
    """
    table = Table("events.calls_20160101")
    get_db().engine.execute(
        f"UPDATE cache.cached SET compute_time = 1 WHERE query_id=%s",
        table.query_id
    )  # Compute time for tables is zero, so set to 1 to avoid zeroing out
    assert 0 == get_score(get_db(), table.query_id)
    assert (1 == get_db().fetch(
        f"SELECT access_count FROM cache.cached WHERE query_id='{table.query_id}'"
    )[0][0])
    accessed_at = get_db().fetch(
        f"SELECT last_accessed FROM cache.cached WHERE query_id='{table.query_id}'"
    )[0][0]
    touch_cache(get_db(), table.query_id)
    assert 0 == get_score(get_db(), table.query_id)
    assert (2 == get_db().fetch(
        f"SELECT access_count FROM cache.cached WHERE query_id='{table.query_id}'"
    )[0][0])
    # No cache touch should be recorded
    assert 2 == get_db().fetch("SELECT nextval('cache.cache_touches');")[0][0]
    assert (accessed_at < get_db().fetch(
        f"SELECT last_accessed FROM cache.cached WHERE query_id='{table.query_id}'"
    )[0][0])
Пример #3
0
def test_tables(flowmachine_connect):
    """
    Private method that creates a test table
    and adds data into it.
    """
    q = """
    BEGIN;
        CREATE TABLE IF NOT EXISTS test_table_a (
            id NUMERIC PRIMARY KEY,
            field TEXT
        );
        INSERT INTO test_table_a VALUES ('1', 'foo') ON CONFLICT (id) DO NOTHING;

        CREATE TABLE IF NOT EXISTS test_table_b (
            id NUMERIC PRIMARY KEY,
            field TEXT,
            numeric_field NUMERIC
        );
        INSERT INTO test_table_b VALUES ('1', 'foo', '300') ON CONFLICT (id) DO NOTHING;
    END;
    """
    get_db().engine.execute(q)
    yield flowmachine_connect
    q = """
            DROP TABLE IF EXISTS test_table_a;
            DROP TABLE IF EXISTS test_table_b;
        """
    get_db().engine.execute(q)
Пример #4
0
def test_invalidate_cascade(flowmachine_connect):
    """
    Test that invalidation does not cascade if cascade=False.

    """
    dl1 = daily_location("2016-01-01")
    dl1.store().result()
    hl1 = ModalLocation(daily_location("2016-01-01"),
                        daily_location("2016-01-02"))
    hl1.store().result()
    hl2 = ModalLocation(daily_location("2016-01-03"),
                        daily_location("2016-01-04"))
    flow = Flows(hl1, hl2)
    flow.store().result()
    assert dl1.is_stored
    assert hl1.is_stored
    assert flow.is_stored
    dl1.invalidate_db_cache(cascade=False)
    assert not dl1.is_stored
    assert hl1.is_stored
    assert flow.is_stored
    assert not cache_table_exists(get_db(), dl1.query_id)
    assert cache_table_exists(get_db(), hl1.query_id)
    has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies"))
    assert has_deps
Пример #5
0
def test_query_progress(dummy_redis):
    """
    Test correct counts for dependency progress are returned.
    """
    dummy = DummyQuery(dummy_param="DUMMY")
    queued_qsm = QueryStateMachine(dummy_redis, dummy.query_id,
                                   get_db().conn_id)
    queued_qsm.enqueue()
    stored_dummy = DummyQuery(dummy_param="STORED_DUMMY")
    stored_dummy.store()
    executing_dummy = DummyQuery(dummy_param="EXECUTING_DUMMY")
    executing_qsm = QueryStateMachine(dummy_redis, executing_dummy.query_id,
                                      get_db().conn_id)
    executing_qsm.enqueue()
    executing_qsm.execute()

    nested = DummyQuery(dummy_param=[dummy, stored_dummy, executing_dummy])
    assert query_progress(nested) == dict(
        eligible=3,
        running=1,
        queued=1,
    )
    nested.store()
    assert query_progress(nested) == dict(
        eligible=0,
        running=0,
        queued=0,
    )
Пример #6
0
def test_redis_resync(flowmachine_connect):
    """
    Test that redis states can be resynced to the flowdb cache.
    """
    stored_query = daily_location("2016-01-01").store().result()
    assert (
        QueryStateMachine(
            get_redis(), stored_query.query_id, get_db().conn_id
        ).current_query_state
        == QueryState.COMPLETED
    )
    assert stored_query.is_stored
    get_redis().flushdb()
    assert stored_query.is_stored
    assert (
        QueryStateMachine(
            get_redis(), stored_query.query_id, get_db().conn_id
        ).current_query_state
        == QueryState.KNOWN
    )
    resync_redis_with_cache(get_db(), get_redis())
    assert (
        QueryStateMachine(
            get_redis(), stored_query.query_id, get_db().conn_id
        ).current_query_state
        == QueryState.COMPLETED
    )
Пример #7
0
async def action_handler__get_sql(config: "FlowmachineServerConfig",
                                  query_id: str) -> ZMQReply:
    """
    Handler for the 'get_sql' action.

    Returns a SQL string which can be run against flowdb to obtain
    the result of the query with given `query_id`.
    """
    # TODO: currently we can't use QueryStateMachine to determine whether
    # the query_id belongs to a valid query object, so we need to check it
    # manually. Would be good to add a QueryState.UNKNOWN so that we can
    # avoid this separate treatment.
    q_info_lookup = QueryInfoLookup(get_redis())
    if not q_info_lookup.query_is_known(query_id):
        msg = f"Unknown query id: '{query_id}'"
        payload = {"query_id": query_id, "query_state": "awol"}
        return ZMQReply(status="error", msg=msg, payload=payload)

    query_state = QueryStateMachine(get_redis(), query_id,
                                    get_db().conn_id).current_query_state

    if query_state == QueryState.COMPLETED:
        q = get_query_object_by_id(get_db(), query_id)
        sql = q.get_query()
        payload = {
            "query_id": query_id,
            "query_state": query_state,
            "sql": sql
        }
        return ZMQReply(status="success", payload=payload)
    else:
        msg = f"Query with id '{query_id}' {query_state.description}."
        payload = {"query_id": query_id, "query_state": query_state}
        return ZMQReply(status="error", msg=msg, payload=payload)
Пример #8
0
def test_invalidate_cache_midchain(flowmachine_connect):
    """
    Test that invalidating a query in the middle of a chain drops the
    top of the chain and this link, but not the bottom.

    """
    dl1 = daily_location("2016-01-01")
    dl1.store().result()
    hl1 = ModalLocation(daily_location("2016-01-01"),
                        daily_location("2016-01-02"))
    hl1.store().result()
    hl2 = ModalLocation(daily_location("2016-01-03"),
                        daily_location("2016-01-04"))
    flow = Flows(hl1, hl2)
    flow.store().result()
    assert dl1.is_stored
    assert hl1.is_stored
    assert flow.is_stored
    hl1.invalidate_db_cache()
    assert dl1.is_stored
    assert not hl1.is_stored
    assert not flow.is_stored
    assert cache_table_exists(get_db(), dl1.query_id)
    assert not cache_table_exists(get_db(), hl1.query_id)
    assert not cache_table_exists(get_db(), flow.query_id)
    has_deps = bool(get_db().fetch("SELECT * FROM cache.dependencies"))
    assert has_deps  # Daily location deps should remain
Пример #9
0
    def get_query(self):
        """
        Returns a  string representing an SQL query. The string will point
        to the database cache of this query if it exists.

        Returns
        -------
        str
            SQL query string.

        """
        try:
            table_name = self.fully_qualified_table_name
            schema, name = table_name.split(".")
            state_machine = QueryStateMachine(get_redis(), self.query_id,
                                              get_db().conn_id)
            state_machine.wait_until_complete()
            if state_machine.is_completed and get_db().has_table(schema=schema,
                                                                 name=name):
                try:
                    touch_cache(get_db(), self.query_id)
                except ValueError:
                    pass  # Cache record not written yet, which can happen for Models
                    # which will call through to this method from their `_make_query` method while writing metadata.
                # In that scenario, the table _is_ written, but won't be visible from the connection touch_cache uses
                # as the cache metadata transaction isn't complete!
                return "SELECT * FROM {}".format(table_name)
        except NotImplementedError:
            pass
        return self._make_query()
Пример #10
0
def test_cache_table_exists(flowmachine_connect):
    """
    Test that cache_table_exists reports accurately.
    """
    assert not cache_table_exists(get_db(), "NONEXISTENT_CACHE_ID")
    assert cache_table_exists(
        get_db(),
        daily_location("2016-01-01").store().result().query_id)
Пример #11
0
def test_get_set_cache_half_life(flowmachine_connect_with_cache_settings_reset):
    """
    Test that cache halflife can be got and set
    """
    assert 1000 == get_cache_half_life(get_db())
    # Now set it to something
    set_cache_half_life(get_db(), 10)
    assert 10 == get_cache_half_life(get_db())
Пример #12
0
def test_table_schema(flowmachine_connect):
    """
    Fixture which creates a schema called 'tests' before every test
    and destroys it again after the test has finished.
    """
    get_db().engine.execute("CREATE SCHEMA IF NOT EXISTS tests")
    yield
    get_db().engine.execute("DROP SCHEMA tests CASCADE")
Пример #13
0
def real_connections(flowmachine_connect):
    with connections():
        try:
            yield
        finally:
            reset_cache(get_db(), get_redis(), protect_table_objects=False)
            get_db().engine.dispose()  # Close the connection
            get_redis().flushdb()  # Empty the redis
Пример #14
0
def test_do_cache_simple(flowmachine_connect):
    """
    Test that a simple object can be cached.

    """
    dl1 = daily_location("2016-01-01")
    write_cache_metadata(get_db(), dl1)
    assert cache_table_exists(get_db(), dl1.query_id)
Пример #15
0
def test_get_set_cache_size_limit(flowmachine_connect_with_cache_settings_reset):
    """
    Test that cache size can be got and set
    """
    # Initial setting depends on the disk space of the FlowDB container so just check it is nonzero
    assert get_max_size_of_cache(get_db()) > 0
    # Now set it to something
    set_max_size_of_cache(get_db(), 10)
    assert 10 == get_max_size_of_cache(get_db())
Пример #16
0
def test_size_of_table(flowmachine_connect):
    """
    Test that table size is reported correctly.
    """
    dl = daily_location("2016-01-01").store().result()

    total_cache_size = get_size_of_cache(get_db())
    table_size = get_size_of_table(get_db(), dl.table_name, "cache")
    assert total_cache_size == table_size
Пример #17
0
def test_table_records_removed(flowmachine_connect):
    """Test that removing a query from cache removes any Tables in cache that pointed to it."""
    dl = daily_location("2016-01-01")
    dl.store().result()
    assert dl.is_stored
    table = dl.get_table()
    assert cache_table_exists(get_db(), table.query_id)

    dl.invalidate_db_cache()
    assert not cache_table_exists(get_db(), table.query_id)
Пример #18
0
def test_shrink_to_size_does_nothing_when_cache_ok(flowmachine_connect):
    """
    Test that shrink_below_size doesn't remove anything if cache size is within limit.
    """
    dl = daily_location("2016-01-01").store().result()
    removed_queries = shrink_below_size(get_db(),
                                        get_size_of_cache(get_db()),
                                        protected_period=-1)
    assert 0 == len(removed_queries)
    assert dl.is_stored
Пример #19
0
def test_redis_resync_runtimeerror(flowmachine_connect, dummy_redis):
    """
    Test that a runtime error is raised if redis is being updated from multiple places when trying to resync.
    """
    stored_query = daily_location("2016-01-01").store().result()
    assert (QueryStateMachine(
        get_redis(), stored_query.query_id,
        get_db().conn_id).current_query_state == QueryState.COMPLETED)
    dummy_redis.allow_flush = False
    with pytest.raises(RuntimeError):
        resync_redis_with_cache(get_db(), dummy_redis)
Пример #20
0
def test_size_of_cache(flowmachine_connect):
    """
    Test that cache size is reported correctly.
    """
    dl = daily_location("2016-01-01").store().result()
    dl_aggregate = dl.aggregate().store().result()
    total_cache_size = get_size_of_cache(get_db())
    removed_query, table_size_a = shrink_one(get_db(), protected_period=-1)
    removed_query, table_size_b = shrink_one(get_db(), protected_period=-1)
    assert total_cache_size == table_size_a + table_size_b
    assert 0 == get_size_of_cache(get_db())
Пример #21
0
def test_do_cache_multi(flowmachine_connect):
    """
    Test that a query containing subqueries can be cached.

    """

    hl1 = ModalLocation(daily_location("2016-01-01"),
                        daily_location("2016-01-02"))
    write_cache_metadata(get_db(), hl1)

    assert cache_table_exists(get_db(), hl1.query_id)
Пример #22
0
def test_shrink_to_size_removes_queries(flowmachine_connect):
    """
    Test that shrink_below_size removes queries when cache limit is breached.
    """
    dl = daily_location("2016-01-01").store().result()
    removed_queries = shrink_below_size(
        get_db(),
        get_size_of_cache(get_db()) - 1,
        protected_period=-1,
    )
    assert 1 == len(removed_queries)
    assert not dl.is_stored
Пример #23
0
def test_geojson_caching_off():
    """Test that switching off caching clears the cache, and doesn't add to it."""
    dl = daily_location("2016-01-01",
                        "2016-01-02",
                        spatial_unit=make_spatial_unit("lon-lat")).aggregate()
    js = dl.to_geojson(crs=2770)  # OSGB36
    dl.turn_off_caching()  # Check caching for geojson switches off
    with pytest.raises(KeyError):
        dl._geojson[proj4string(get_db(), 2770)]
    js = dl.to_geojson(crs=2770)  # OSGB36
    with pytest.raises(KeyError):
        dl._geojson[proj4string(get_db(), 2770)]
Пример #24
0
def test_do_cache_nested(flowmachine_connect):
    """
    Test that a query containing nested subqueries can be cached.

    """
    hl1 = ModalLocation(daily_location("2016-01-01"),
                        daily_location("2016-01-02"))
    hl2 = ModalLocation(daily_location("2016-01-03"),
                        daily_location("2016-01-04"))
    flow = Flows(hl1, hl2)
    write_cache_metadata(get_db(), flow)

    assert cache_table_exists(get_db(), flow.query_id)
Пример #25
0
def test_cache_reset(flowmachine_connect):
    """
    Test that cache and redis are both reset.
    """
    stored_query = daily_location("2016-01-01").store().result()
    assert (QueryStateMachine(
        get_redis(), stored_query.query_id,
        get_db().conn_id).current_query_state == QueryState.COMPLETED)
    assert stored_query.is_stored
    reset_cache(get_db(), get_redis())
    assert (QueryStateMachine(
        get_redis(), stored_query.query_id,
        get_db().conn_id).current_query_state == QueryState.KNOWN)
    assert not stored_query.is_stored
Пример #26
0
def test_can_force_rewrite(flowmachine_connect, get_length):
    """
    Test that we can force the rewrite of a test to the database.
    """
    query = EventTableSubset(start="2016-01-01", stop="2016-01-01 01:00:00")
    query.to_sql(name="test_rewrite", schema="tests").result()
    # We're going to delete everything from the table, then
    # force a rewrite, and check that the table now has data.
    sql = """DELETE FROM tests.test_rewrite"""
    get_db().engine.execute(sql)
    assert 0 == get_length(Table("tests.test_rewrite"))
    query.invalidate_db_cache(name="test_rewrite", schema="tests")
    query.to_sql(name="test_rewrite", schema="tests").result()
    assert 1 < get_length(Table("tests.test_rewrite"))
Пример #27
0
        def do_get():
            if self._cache:
                try:
                    return self._df.copy()
                except AttributeError:
                    qur = f"SELECT {self.column_names_as_string_list} FROM ({self.get_query()}) _"
                    with get_db().engine.begin():
                        self._df = pd.read_sql_query(qur, con=get_db().engine)

                    return self._df.copy()
            else:
                qur = f"SELECT {self.column_names_as_string_list} FROM ({self.get_query()}) _"
                with get_db().engine.begin():
                    return pd.read_sql_query(qur, con=get_db().engine)
Пример #28
0
async def action_handler__poll_query(config: "FlowmachineServerConfig",
                                     query_id: str) -> ZMQReply:
    """
    Handler for the 'poll_query' action.

    Returns the status of the query with the given `query_id`.
    """
    query_kind = _get_query_kind_for_query_id(query_id)
    # TODO: we should probably be able to use the QueryStateMachine to determine
    # whether the query already exists.
    if query_kind is None:
        payload = {"query_id": query_id, "query_state": "awol"}
        return ZMQReply(status="error",
                        msg=f"Unknown query id: '{query_id}'",
                        payload=payload)
    else:
        q_state_machine = QueryStateMachine(get_redis(), query_id,
                                            get_db().conn_id)
        payload = {
            "query_id":
            query_id,
            "query_kind":
            query_kind,
            "query_state":
            q_state_machine.current_query_state,
            "progress":
            query_progress(FlowmachineQuerySchema().load(
                QueryInfoLookup(get_redis()).get_query_params(
                    query_id))._flowmachine_query_obj),
        }
        return ZMQReply(status="success", payload=payload)
Пример #29
0
    def __iter__(self):
        con = get_db().engine
        qur = self.get_query()
        with con.begin():
            self._query_object = con.execute(qur)

        return self
Пример #30
0
def test_blocks_on_store_cascades():
    """
    If a store is running on a query that is used
    in a another query, that query should wait.
    """
    dl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("cell"))
    dl2 = daily_location("2016-01-02", spatial_unit=make_spatial_unit("cell"))
    store_future = dl.store()
    store_future.result()
    hl = ModalLocation(dl, dl2)
    timer = []

    def unlock(timer, redis, db_id):
        qsm = QueryStateMachine(redis, dl.query_id, db_id)
        qsm.enqueue()
        for i in range(101):
            timer.append(i)
        qsm.execute()
        qsm.finish()

    timeout = Thread(target=unlock,
                     args=(timer, get_redis(), get_db().conn_id))
    timeout.start()
    hl.get_query()
    assert len(timer) == 101
    timeout.join()