Exemplo n.º 1
0
 def aggregate(self):
     """
     Aggregate to the spatial level returning a query object
     that represents the location, and the total counts of
     subscribers.
     """
     return SpatialAggregate(locations=self)
Exemplo n.º 2
0
 def __init__(self, *, joined_spatial_aggregate: JoinedSpatialAggregate):
     self.joined_spatial_aggregate = joined_spatial_aggregate
     self.redacted_spatial_agg = RedactedSpatialAggregate(
         spatial_aggregate=SpatialAggregate(
             locations=self.joined_spatial_aggregate.locations))
     self.spatial_unit = self.joined_spatial_aggregate.locations.spatial_unit
     super().__init__()
Exemplo n.º 3
0
def test_get_sql(zmq_port, zmq_host):
    """
    Running 'get_sql' on finished query returns the expected result.
    """
    #
    # Run daily_location query.
    #
    msg = {
        "action": "run_query",
        "params": {
            "query_kind": "spatial_aggregate",
            "locations": {
                "query_kind": "daily_location",
                "date": "2016-01-01",
                "method": "last",
                "aggregation_unit": "admin3",
                "subscriber_subset": None,
            },
        },
        "request_id": "DUMMY_ID",
    }
    q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate(
        locations=daily_location(
            date="2016-01-01",
            method="last",
            spatial_unit=make_spatial_unit("admin", level=3),
            table=None,
            subscriber_subset=None,
            hours=None,
        )))
    expected_query_id = q.query_id

    reply = send_zmq_message_and_receive_reply(msg,
                                               port=zmq_port,
                                               host=zmq_host)
    # assert reply["status"] in ("executing", "queued", "completed")
    assert "success" == reply["status"]
    assert expected_query_id == reply["payload"]["query_id"]

    #
    # Wait until the query has finished.
    #
    poll_until_done(zmq_port, expected_query_id)

    #
    # Get query result.
    #
    msg = {
        "action": "get_sql_for_query_result",
        "params": {
            "query_id": expected_query_id
        },
        "request_id": "DUMMY_ID",
    }
    reply = send_zmq_message_and_receive_reply(msg,
                                               port=zmq_port,
                                               host=zmq_host)
    assert "success" == reply["status"]
    assert f"SELECT * FROM cache.x{expected_query_id}" == reply["payload"][
        "sql"]
Exemplo n.º 4
0
    def _flowmachine_query_obj(self):
        """
        Return the underlying flowmachine object.

        Returns
        -------
        Query
        """
        locations = self.locations._flowmachine_query_obj
        return RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate(
            locations=locations))
Exemplo n.º 5
0
def test_send_zmq_message_and_receive_reply(zmq_host, zmq_port):
    """
    Reply from the flowmachine server to the example message stored in `FM_EXAMPLE_MESSAGE` is as expected.
    """

    # Check that FM_EXAMPLE_MESSAGE contains the expected message
    msg_expected = {
        "action": "run_query",
        "params": {
            "query_kind": "spatial_aggregate",
            "locations": {
                "query_kind": "daily_location",
                "date": "2016-01-01",
                "method": "last",
                "aggregation_unit": "admin3",
                "subscriber_subset": None,
            },
        },
        "request_id": "DUMMY_ID",
    }
    assert msg_expected == FM_EXAMPLE_MESSAGE

    q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate(
        locations=daily_location(
            date="2016-01-01",
            method="last",
            spatial_unit=make_spatial_unit("admin", level=3),
            table=None,
            subscriber_subset=None,
        )))
    expected_query_id = q.query_id

    # Check that the flowmachine server sends the expected reply
    reply = send_zmq_message_and_receive_reply(FM_EXAMPLE_MESSAGE,
                                               host=zmq_host,
                                               port=zmq_port)
    assert expected_query_id == reply["payload"]["query_id"]
    # assert reply["status"] in ("executing", "queued", "completed")
    assert reply["status"] in ("success")

    # FIXME: At the moment we have to explicitly wait for all running queries
    # to finish before finishing the test, otherwise unexpected behaviour may
    # occur when we reset the cache before the next test
    # (see https://github.com/Flowminder/FlowKit/issues/1245).
    poll_until_done(zmq_port, expected_query_id)
Exemplo n.º 6
0
def test_run_daily_location_query(zmq_host, zmq_port):
    """
    Can run daily location query and receive successful response including the query_id.
    """
    msg = {
        "action": "run_query",
        "params": {
            "query_kind": "spatial_aggregate",
            "locations": {
                "query_kind": "daily_location",
                "date": "2016-01-01",
                "method": "most-common",
                "aggregation_unit": "admin3",
                "subscriber_subset": None,
            },
        },
        "request_id": "DUMMY_ID",
    }
    reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host)

    q = RedactedSpatialAggregate(
        spatial_aggregate=SpatialAggregate(
            locations=daily_location(
                date="2016-01-01",
                method="most-common",
                spatial_unit=make_spatial_unit("admin", level=3),
                table=None,
                subscriber_subset=None,
                hours=None,
            )
        )
    )
    expected_query_id = q.query_id

    assert "success" == reply["status"]
    assert expected_query_id == reply["payload"]["query_id"]
    assert ["query_id", "progress"] == list(reply["payload"].keys())

    # FIXME: At the moment we have to explicitly wait for all running queries
    # to finish before finishing the test, otherwise unexpected behaviour may
    # occur when we reset the cache before the next test
    # (see https://github.com/Flowminder/FlowKit/issues/1245).
    poll_until_done(zmq_port, expected_query_id)
Exemplo n.º 7
0
def test_run_query(zmq_port, zmq_host, fm_conn, redis):
    """
    Run daily_location query and check the resulting table contains the expected rows.
    """
    msg_run_query = {
        "action": "run_query",
        "params": {
            "query_kind": "spatial_aggregate",
            "locations": {
                "query_kind": "daily_location",
                "date": "2016-01-01",
                "method": "last",
                "aggregation_unit": "admin3",
                "subscriber_subset": None,
            },
        },
        "request_id": "DUMMY_ID",
    }
    q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate(
        locations=daily_location(
            date="2016-01-01",
            method="last",
            spatial_unit=make_spatial_unit("admin", level=3),
            table=None,
            subscriber_subset=None,
        )))
    expected_query_id = q.query_id

    #
    # Check that we are starting with a clean slate (no cache tables, empty redis).
    #
    reset_cache(get_db(), redis, protect_table_objects=False)
    assert cache_schema_is_empty(get_db())
    assert not redis.exists(expected_query_id)

    #
    # Send message to run the daily_location query, check it was accepted
    # and a redis lookup was created for the query id.
    #
    reply = send_zmq_message_and_receive_reply(msg_run_query,
                                               port=zmq_port,
                                               host=zmq_host)
    # assert reply["status"] in ("executing", "queued", "completed")
    assert reply["status"] in ("success")
    assert expected_query_id == reply["payload"]["query_id"]
    # assert redis.exists(expected_query_id)

    #
    # Wait until the query has finished.
    #
    poll_until_done(zmq_port, expected_query_id)

    #
    # Check that a cache table for the query result was created
    # and that it contains the expected number of rows.
    #
    output_cache_table = f"x{expected_query_id}"
    assert output_cache_table in get_cache_tables(get_db())
    num_rows = (get_db().engine.execute(
        f"SELECT COUNT(*) FROM cache.{output_cache_table}").fetchone()[0])
    assert num_rows == 14

    #
    # In addition, check first few rows of the result are as expected.
    #

    first_few_rows_expected = [
        ("524 1 02 09", 26),
        ("524 1 03 13", 20),
        ("524 3 08 43", 35),
    ]
    first_few_rows = (get_db().engine.execute(
        f"SELECT * FROM cache.{output_cache_table} ORDER BY pcod LIMIT 3").
                      fetchall())
    assert first_few_rows_expected == first_few_rows
Exemplo n.º 8
0
def test_cache_content(
        start_flowmachine_server_with_or_without_dependency_caching, fm_conn,
        redis):
    """
    Run a query with dependency caching turned on, and check that its dependencies are cached.
    Run a query with dependency caching turned off, and check that only the query itself is cached.
    """
    # Can't use the zmq_port fixture here as we're running against a different FlowMachine server
    zmq_port = os.getenv("FLOWMACHINE_PORT")

    msg_run_query = {
        "action": "run_query",
        "params": {
            "query_kind": "spatial_aggregate",
            "locations": {
                "query_kind": "daily_location",
                "date": "2016-01-01",
                "method": "last",
                "aggregation_unit": "admin3",
                "subscriber_subset": None,
            },
        },
        "request_id": "DUMMY_ID",
    }
    q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate(
        locations=daily_location(
            date="2016-01-01",
            method="last",
            spatial_unit=make_spatial_unit("admin", level=3),
            table=None,
            subscriber_subset=None,
        )))

    # Get list of tables that should be cached
    expected_cache_tables = [q.table_name]
    if "false" == os.getenv("FLOWMACHINE_SERVER_DISABLE_DEPENDENCY_CACHING"):
        dependencies = unstored_dependencies_graph(q)
        for node, query_obj in dependencies.nodes(data="query_object"):
            try:
                schema, table_name = query_obj.fully_qualified_table_name.split(
                    ".")
                if schema == "cache":
                    expected_cache_tables.append(table_name)
            except NotImplementedError:
                # Some queries cannot be cached, and don't have table names
                pass

    # Check that we are starting with an empty cache.
    assert cache_schema_is_empty(get_db(),
                                 check_internal_tables_are_empty=False)

    # Send message to run the daily_location query, and check it was accepted
    reply = send_zmq_message_and_receive_reply(msg_run_query,
                                               port=zmq_port,
                                               host="localhost")
    assert reply["status"] == "success"
    query_id = reply["payload"]["query_id"]

    # Wait until the query has finished.
    poll_until_done(zmq_port, query_id)

    # Check that the cache contains the correct tables.
    assert sorted(expected_cache_tables) == get_cache_tables(get_db())