def test_unmoving_at_reference_location_values(get_dataframe): df = get_dataframe( UnmovingAtReferenceLocation( locations=UniqueLocations( SubscriberLocations( "2016-01-01", "2016-01-01 10:00", spatial_unit=make_spatial_unit("admin", level=3), )), reference_locations=LastLocation("2016-01-01", "2016-01-02"), )).set_index("subscriber") assert not df.loc["038OVABN11Ak4W5P"].value assert df.loc["3XKdxqvyNxO2vLD1"].value
def test_subscriber_location_entropy(get_dataframe): """ Test some hand picked periods and tables. """ query = LocationEntropy("2016-01-01", "2016-01-08") df = get_dataframe(query).set_index("subscriber") assert df.loc["0DB8zw67E9mZAPK2"].entropy == pytest.approx(2.996_587) query = LocationEntropy("2016-01-02", "2016-01-05", spatial_unit=make_spatial_unit("admin", level=1)) df = get_dataframe(query).set_index("subscriber") assert df.loc["0DB8zw67E9mZAPK2"].entropy == pytest.approx(1.214_889_6)
def test_invalid_statistic_raises_error(): """ Test that passing an invalid statistic raises an error. """ with pytest.raises(ValueError, match="'NOT_A_STATISTIC' is not a valid statistic"): DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat")), statistic="NOT_A_STATISTIC", )
def get_spatial_unit_obj(aggregation_unit_string) -> GeomSpatialUnit: """ Given an aggregation unit string (as validated by AggregationUnit()), return a FlowMachine spatial unit object. """ if "admin" in aggregation_unit_string: level = int(aggregation_unit_string[-1]) spatial_unit_args = {"spatial_unit_type": "admin", "level": level} else: raise NotImplementedError( f"The helper function `get_spatial_unit_obj` does not support aggregation units of type '{aggregation_unit_string}'." ) return make_spatial_unit(**spatial_unit_args)
def test_no_cast_for_below_day(get_dataframe): """ Test that results aren't cast to date for smaller time buckets. """ df = get_dataframe( DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("lon-lat")), time_bucket="hour", )) assert isinstance(df.datetime[0], datetime)
def test_unmoving_at_reference_location_counts_column_names( get_column_names_from_run): assert (get_column_names_from_run( UnmovingAtReferenceLocationCounts( UnmovingAtReferenceLocation( locations=UniqueLocations( SubscriberLocations( "2016-01-01", "2016-01-01 10:00", spatial_unit=make_spatial_unit("admin", level=3), )), reference_locations=LastLocation("2016-01-01", "2016-01-02"), ))) == ["pcod", "value"])
def test_join_with_polygon(get_dataframe, get_length): """ Test that flowmachine.JoinToLocation can get the (arbitrary) polygon of each cell. """ ul = SubscriberLocations( "2016-01-05", "2016-01-07", spatial_unit=make_spatial_unit("cell") ) j = JoinToLocation( ul, spatial_unit=make_spatial_unit( "polygon", region_id_column_name="admin3pcod", geom_table="geography.admin3", geom_column="geom", ), ) df = get_dataframe(j) expected_cols = sorted(["admin3pcod", "location_id", "subscriber", "time"]) assert sorted(df.columns) == expected_cols assert len(df) == get_length(ul)
def test_column_names_meaningful_locations(get_column_names_from_run, meaningful_locations_labels): """Test that column_names property matches head(0) for meaningfullocations""" mfl = MeaningfulLocations( clusters=HartiganCluster( calldays=CallDays(subscriber_locations=SubscriberLocations( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), )), radius=1, ), scores=EventScore( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), ), labels=meaningful_locations_labels, label="evening", ) assert get_column_names_from_run(mfl) == mfl.column_names
def test_join_with_versioned_cells(get_dataframe, get_length): """ Test that flowmachine.JoinToLocation can fetch the cell version. """ ul = SubscriberLocations("2016-01-05", "2016-01-07", spatial_unit=make_spatial_unit("cell")) df = get_dataframe( JoinToLocation(ul, spatial_unit=make_spatial_unit("versioned-cell"))) # As our database is complete we should not drop any rows assert len(df) == get_length(ul) # These should all be version zero, these are the towers before the changeover date, or those that # have not moved. should_be_version_zero = df[(df.time <= move_date) | (~df.location_id.isin(moving_sites))] # These should all be one, they are the ones after the change over time that have moved. should_be_version_one = df[(df.time > move_date) & (df.location_id.isin(moving_sites))] assert (should_be_version_zero.version == 0).all() assert (should_be_version_one.version == 1).all()
def test_active_at_reference_location_counts_column_names( get_column_names_from_run): assert get_column_names_from_run( ActiveAtReferenceLocationCounts( ActiveAtReferenceLocation( subscriber_locations=UniqueLocations( SubscriberLocations( "2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("admin", level=3), )), reference_locations=daily_location("2016-01-03"), ))) == ["pcod", "value"]
def __init__( self, start, stop, *, location, spatial_unit: AnySpatialUnit = make_spatial_unit("cell"), hours="all", table="all", subscriber_identifier="msisdn", ignore_nulls=True, subscriber_subset=None, ): """""" if location == "any" and spatial_unit != make_spatial_unit("cell"): raise ValueError( "Invalid parameter combination: location='any' can only be used with cell spatial unit." ) self.start = standardise_date(start) self.stop = standardise_date(stop) self.location = location self.ul = SubscriberLocations( self.start, self.stop, spatial_unit=spatial_unit, hours=hours, table=table, subscriber_identifier=subscriber_identifier, ignore_nulls=ignore_nulls, subscriber_subset=subscriber_subset, ) self.table = self.ul.table self.subscriber_identifier = self.ul.subscriber_identifier super().__init__()
def test_can_get_pcods(get_dataframe): """ SubscriberLocations() can make queries at the p-code level. """ subscriber_pcod = SubscriberLocations( "2016-01-01 13:30:30", "2016-01-02 16:25:00", spatial_unit=make_spatial_unit("polygon", region_id_column_name="admin3pcod", geom_table="geography.admin3"), ) df = get_dataframe(subscriber_pcod) assert df.admin3pcod[0].startswith("524")
def test_join_with_lon_lat(get_dataframe): """ Test that flowmachine.JoinToLocation can get the lon-lat values of the cell """ ul = SubscriberLocations( "2016-01-05", "2016-01-07", spatial_unit=make_spatial_unit("cell") ) df = get_dataframe(JoinToLocation(ul, spatial_unit=make_spatial_unit("lon-lat"))) expected_cols = sorted(["subscriber", "time", "location_id", "lon", "lat"]) assert sorted(df.columns) == expected_cols # Pick out one cell that moves location and assert that the # lon-lats are right focal_cell = "dJb0Wd" lon1, lat1 = (83.09284486, 27.648837800000003) lon2, lat2 = (83.25769074752517, 27.661443318109132) post_move = df[(df.time > move_date) & (df["location_id"] == focal_cell)] pre_move = df[(df.time < move_date) & (df["location_id"] == focal_cell)] # And check them all one-by-one np.isclose(pre_move.lon, lon1).all() np.isclose(pre_move.lat, lat1).all() np.isclose(post_move.lon, lon2).all() np.isclose(post_move.lat, lat2).all()
def test_reprojection(): """ Test that in db reprojection works. """ dl = daily_location( "2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("lon-lat") ).aggregate() js = dl.to_geojson(crs=2770) # OSGB36 assert js["features"][0]["geometry"]["coordinates"] == [ -8094697.51781301, 9465052.88370377, ] assert js["properties"]["crs"] == proj4string(dl.connection, 2770)
def test_handles_list_of_locations(get_dataframe): """ FirstLocation() subsets data based on a list of locations, rather than a single one. """ dfl = FirstLocation( "2016-01-01", "2016-01-04", location=["QeBRM8", "m9jL23" "LVnDQL"], spatial_unit=make_spatial_unit("versioned-site"), ) df = get_dataframe(dfl) df.set_index("subscriber", inplace=True) assert str(df.loc["038OVABN11Ak4W5P", "time"]) == "2016-01-01 05:02:10+00:00"
def test_whether_scores_are_within_score_bounds(get_dataframe): """ Test whether the scores are within the bounds of maximum and minimum scores. """ es = EventScore( start="2016-01-01", stop="2016-01-05", spatial_unit=make_spatial_unit("versioned-site"), ) df = get_dataframe(es) max_score = df[["score_hour", "score_dow"]].max() min_score = df[["score_hour", "score_dow"]].min() assert all(max_score <= [1, 1]) assert all(min_score >= [-1, -1])
def test_last_loc_lon_lat(get_dataframe): """ LastLocation() can make queries at the lon-lat level. """ last_loc = LastLocation("2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("lon-lat")) df = get_dataframe(last_loc) df.set_index("subscriber", inplace=True) assert pytest.approx(83.09669810947962) == float( df.loc["yqw50eNyEwOxNDGL"].lon) assert pytest.approx(29.135638957790576) == float( df.loc["yqw50eNyEwOxNDGL"].lat)
def test_unique_visitor_counts(get_dataframe): """ Values test for unique visitor counts. """ activity = UniqueVisitorCounts( ActiveAtReferenceLocationCounts( ActiveAtReferenceLocation( subscriber_locations=UniqueLocations( SubscriberLocations( "2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("admin", level=3), )), reference_locations=daily_location("2016-01-03"), )), UniqueSubscriberCounts("2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("admin", level=3)), ) df = get_dataframe(activity).set_index("pcod") assert df.loc["524 1 01 04"].value == 66 assert df.loc["524 3 08 44"].value == 170
def test_min_displacement_zero(get_dataframe): """ When time period for diplacement and home location are the same min displacement should be zero for all subscribers """ rl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("lon-lat")) df = get_dataframe( Displacement("2016-01-01", "2016-01-07", reference_location=rl, statistic="min")) assert df.value.sum() == 0
def test_returns_expected_values(stat, sub_a_expected, sub_b_expected, get_dataframe): """ Test that we get expected return values for the various statistics """ sub_a_id, sub_b_id = "j6QYNbMJgAwlVORP", "NG1km5NzBg5JD8nj" rl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("lon-lat")) df = get_dataframe( DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat")), reference_location=rl, statistic=stat, )).set_index(["subscriber", "datetime"]) assert df.loc[(sub_a_id, datetime(2016, 1, 1))].value == pytest.approx(sub_a_expected) assert df.loc[(sub_b_id, datetime(2016, 1, 6))].value == pytest.approx(sub_b_expected)
def test_invalid_time_bucket_raises_error(): """ Test that passing an invalid time bucket raises an error. """ with pytest.raises( ValueError, match="'NOT_A_BUCKET' is not a valid value for time_bucket"): DistanceSeries( subscriber_locations=SubscriberLocations( "2016-01-01", "2016-01-07", spatial_unit=make_spatial_unit("lon-lat")), time_bucket="NOT_A_BUCKET", )
def test_some_results(get_dataframe): """ DistanceMatrix() returns a dataframe that contains hand-picked results. """ c = DistanceMatrix(spatial_unit=make_spatial_unit("versioned-site")) df = get_dataframe(c) set_df = df.set_index( ["site_id_from", "version_from", "site_id_to", "version_to"]) assert set_df.loc[("8wPojr", 1, "GN2k0G", 0)]["value"] == pytest.approx(789.23239740488) assert set_df.loc[("8wPojr", 0, "GN2k0G", 0)]["value"] == pytest.approx(769.20155628077) assert set_df.loc[("8wPojr", 1, "DbWg4K", 0)]["value"] == pytest.approx(757.97771793683)
def test_unmoving_values(get_dataframe): df = get_dataframe( Unmoving( locations=UniqueLocations( SubscriberLocations( "2016-01-01", "2016-01-01 10:00", spatial_unit=make_spatial_unit("admin", level=3), ) ) ) ).set_index("subscriber") assert not df.loc["038OVABN11Ak4W5P"].value assert df.loc["0Gl95NRLjW2aw8pW"].value
def test_contact_reference_location_no_spatial_unit_raises(): """ Test ValueError is raised for contact_location without spatial_unit attribute. """ cb = ContactBalance("2016-01-01", "2016-01-03") # by encapsulating ModalLocations in a CustomQuery we remove the spatial_unit # attribute from it which should raise an error ml = ModalLocation(*[ daily_location( d, spatial_unit=make_spatial_unit("versioned-cell"), subscriber_subset=cb.counterparts_subset(include_subscribers=True), ) for d in list_of_dates("2016-01-01", "2016-01-03") ]) ml = CustomQuery(ml.get_query(), ml.column_names) with pytest.raises(ValueError): query = ContactReferenceLocationStats(cb, ml)
def test_contact_reference_location_bad_spatial_unit_raises(): """ Test InvalidSpatialUnitError is raised for contact_location with non-compliant spatial unit. """ cb = ContactBalance("2016-01-01", "2016-01-03") ml = ModalLocation(*[ daily_location( d, spatial_unit=make_spatial_unit("admin", level=3), subscriber_subset=cb.counterparts_subset(include_subscribers=True), ) for d in list_of_dates("2016-01-01", "2016-01-03") ]) with pytest.raises(InvalidSpatialUnitError): query = ContactReferenceLocationStats(cb, ml)
def test_cluster_is_within_envelope(get_dataframe): """ Test that all the clusters are within the enveloped formed by all the towers in the cluster. """ cd = CallDays( SubscriberLocations("2016-01-01", "2016-01-04", spatial_unit=make_spatial_unit("versioned-site"))) hartigan = HartiganCluster(calldays=cd, radius=50) har_df = hartigan.to_geopandas() sites = Sites().to_geopandas().set_index(["site_id", "version"]) towers = GeoSeries(har_df.apply(lambda x: get_geom_point(x, sites), 1)) s = har_df.intersects(towers) assert all(s)
def test_all_options_hartigan(): """ Test whether Hartigan works when changing all options. """ cd = CallDays( SubscriberLocations("2016-01-01", "2016-01-04", spatial_unit=make_spatial_unit("versioned-site"))) har = HartiganCluster(calldays=cd, radius=50, buffer=2, call_threshold=2).to_geopandas() assert set(har.columns) == set([ "subscriber", "geometry", "rank", "calldays", "site_id", "version", "centroid" ])
def test_call_threshold_works(get_dataframe): """ Test whether a call threshold above 1 limits the number of clusters. """ cd = CallDays( SubscriberLocations("2016-01-01", "2016-01-04", spatial_unit=make_spatial_unit("versioned-site"))) hartigan = HartiganCluster(calldays=cd, radius=50) har_df = hartigan.to_geopandas() assert any(har_df.calldays == 1) har_df_higher_call_threshold = get_dataframe( HartiganCluster(calldays=cd, radius=50, call_threshold=2)) assert len(har_df) > len(har_df_higher_call_threshold)
def test_meaningful_locations_aggregate_disallowed_spatial_unit_raises(): """ Test that a bad spatial unit raises an InvalidSpatialUnitError""" with pytest.raises(InvalidSpatialUnitError): mfl_agg = MeaningfulLocationsAggregate( meaningful_locations=MeaningfulLocations( clusters=HartiganCluster( calldays=CallDays(subscriber_locations=SubscriberLocations( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), )), radius=1, ), scores=EventScore( start="2016-01-01", stop="2016-01-02", spatial_unit=make_spatial_unit("versioned-site"), ), labels=labels, label="evening", ), spatial_unit=make_spatial_unit("lon-lat"), )
def test_default_indexes(): """ Check that default indexing columns are correct """ assert daily_location("2016-01-01", "2016-01-02").index_cols == [ ["pcod"], '"subscriber"', ] assert daily_location( "2016-01-01", "2016-01-02", spatial_unit=make_spatial_unit("lon-lat")).index_cols == [[ "lon", "lat" ], '"subscriber"'] assert SubscriberDegree("2016-01-01", "2016-01-02").index_cols == ['"subscriber"']