def __init__(self, *, metric, locations, method="avg"): self.metric = metric self.locations = locations # self.spatial_unit is used in self._geo_augmented_query self.spatial_unit = locations.spatial_unit self.method = method.lower() if self.method not in self.allowed_methods: raise ValueError( f"{method} is not recognised method, must be one of {self.allowed_methods}" ) try: if ( parse_datestring(self.metric.start).date() != parse_datestring(self.locations.start).date() ): warnings.warn( f"{self.metric} and {self.locations} have different start dates: {self.metric.start}, and {self.locations.start}" ) if ( parse_datestring(self.metric.stop).date() != parse_datestring(self.locations.stop).date() ): warnings.warn( f"{self.metric} and {self.locations} have different stop dates: {self.metric.stop}, and {self.locations.stop}" ) except AttributeError: pass # Not everything has a start/stop date super().__init__()
def __init__(self, metric, locations, method="mean"): self.metric = metric self.locations = locations self.level = locations.level self.column_name = locations.column_name self.method = method.lower() if self.method not in ("mean", "median", "mode"): raise ValueError("{} is not recognised method".format(method)) try: if (parse_datestring(self.metric.start).date() != parse_datestring( self.locations.start).date()): warnings.warn( "{} and {} have different start dates: {}, and {}".format( self.metric, self.locations, self.metric.start, self.locations.start, )) if (parse_datestring(self.metric.stop).date() != parse_datestring( self.locations.stop).date()): warnings.warn( "{} and {} have different stop dates: {}, and {}".format( self.metric, self.locations, self.metric.stop, self.locations.stop, )) except AttributeError: pass # Not everything has a start/stop date super().__init__()
def __init__(self, *daily_locations): # TODO: check that all the inputs are actually location objects (of an appropriate kind) self.start = str( min( parse_datestring(daily_location.start) for daily_location in daily_locations ) ) self.stop = str( max( parse_datestring(daily_location.start) for daily_location in daily_locations ) ) self._all_dls = daily_locations logger.info("ModalLocation using {} DailyLocations".format(len(self._all_dls))) logger.info( "{}/{} DailyLocations are pre-calculated.".format( sum(1 for dl in self._all_dls if dl.is_stored), len(self._all_dls) ) ) # Importing daily_location inputs # from first daily_location object. self.level = self._all_dls[0].level self.subscriber_identifier = self._all_dls[0].subscriber_identifier self.column_name = self._all_dls[0].column_name super().__init__()
def test_parse(): """ Test that several variations on a datestring give the same date """ assert (parse_datestring("2016-01-01").date() == parse_datestring( "2016-01-01 10:00").date()) assert (parse_datestring("2016-01-01").date() == parse_datestring( "2016-01-01 10:00:00").date())
def __init__(self, start_date, end_date): self.start_date = parse_datestring(start_date) self.end_date = parse_datestring(end_date) self.start_date_as_str = standardise_date(start_date) self.end_date_as_str = standardise_date(end_date) self.one_day_past_end_date = self.end_date + dt.timedelta(days=1) self.one_day_past_end_date_as_str = standardise_date( self.one_day_past_end_date)
def __init__(self, start, stop, modal_locations=None, statistic="avg", unit="km", **kwargs): # need to subtract one day from hl end in order to be # comparing over same period... self.stop_sl = stop self.stop_hl = str(parse_datestring(stop) - relativedelta(days=1)) self.start = start allowed_levels = ["lat-lon", "versioned-cell", "versioned-site"] if modal_locations: if (isinstance(modal_locations, ModalLocation) and modal_locations.level in allowed_levels): hl = modal_locations else: raise ValueError( f"Argument 'modal_locations' should be an instance of ModalLocation class with level in {allowed_levels}" ) else: hl = ModalLocation(*[ daily_location(date, level="lat-lon", **kwargs) for date in list_of_dates(self.start, self.stop_hl) ]) sl = subscriber_locations(self.start, self.stop_sl, level="lat-lon", **kwargs) self.statistic = statistic.lower() if self.statistic not in valid_stats: raise ValueError( "{} is not a valid statistic. Use one of {}".format( self.statistic, valid_stats)) self.joined = hl.join( sl, on_left="subscriber", on_right="subscriber", how="left", left_append="_home_loc", right_append="", ) self.unit = unit super().__init__()
def __init__(self, *, metric, locations, method="mean"): self.metric = metric self.locations = locations self.level = locations.level self.column_name = locations.column_name self.method = method.lower() if self.method not in self.allowed_methods: raise ValueError( f"{method} is not recognised method, must be one of {self.allowed_methods}" ) try: if (parse_datestring(self.metric.start).date() != parse_datestring( self.locations.start).date()): warnings.warn( f"{self.metric} and {self.locations} have different start dates: {self.metric.start}, and {self.locations.start}" ) if (parse_datestring(self.metric.stop).date() != parse_datestring( self.locations.stop).date()): warnings.warn( f"{self.metric} and {self.locations} have different stop dates: {self.metric.stop}, and {self.locations.stop}" ) except AttributeError: pass # Not everything has a start/stop date super().__init__()
def daily_location( date, stop=None, *, spatial_unit: Optional[AnySpatialUnit] = None, hours: Optional[Tuple[int, int]] = None, method="last", table="all", subscriber_identifier="msisdn", ignore_nulls=True, subscriber_subset=None, ): """ Return a query for locating all subscribers on a single day of data. Parameters ---------- date : str iso format date for the day in question, e.g. 2016-01-01 stop : str optionally specify a stop datetime in iso format date for the day in question, e.g. 2016-01-02 06:00:00 spatial_unit : flowmachine.core.spatial_unit.*SpatialUnit, default admin3 Spatial unit to which subscriber locations will be mapped. See the docstring of make_spatial_unit for more information. hours : tuple of ints, default None Subset the result within certain hours, e.g. (4,17) This will subset the query only with these hours, but across all specified days. Or set to 'all' to include all hours. method : str, default 'last' The method by which to calculate the location of the subscriber. This can be either 'most-common' or last. 'most-common' is simply the modal location of the subscribers, whereas 'lsat' is the location of the subscriber at the time of the final call in the data. table : str, default 'all' schema qualified name of the table which the analysis is based upon. If 'ALL' it will use all tables that contain location data, specified in flowmachine.yml. subscriber_identifier : {'msisdn', 'imei'}, default 'msisdn' Either msisdn, or imei, the column that identifies the subscriber. subscriber_subset : str, list, flowmachine.core.Query, flowmachine.core.Table, default None If provided, string or list of string which are msisdn or imeis to limit results to; or, a query or table which has a column with a name matching subscriber_identifier (typically, msisdn), to limit results to. Notes ----- * A date without a hours and mins will be interpreted as midnight of that day, so to get data within a single day pass (e.g.) '2016-01-01', '2016-01-02'. * Use 24 hr format! """ if spatial_unit is None: spatial_unit = make_spatial_unit("admin", level=3) # Temporary band-aid; marshmallow deserialises date strings # to date objects, so we convert it back here because the # lower-level classes still assume we are passing date strings. date = standardise_date(date) if stop is None: # 'cast' the date object as a date d1 = parse_datestring(date) # One day after this d2 = d1 + datetime.timedelta(1) stop = standardise_date(d2) return locate_subscribers( start=date, stop=stop, spatial_unit=spatial_unit, hours=hours, method=method, table=table, subscriber_identifier=subscriber_identifier, ignore_nulls=ignore_nulls, subscriber_subset=subscriber_subset, )
def test_datestring_parse_error(): """ Test that correct error is raised when failing to parse a datestring. """ with pytest.raises(ValueError): parse_datestring("DEFINITELY NOT A DATE")
def __init__( self, start, stop, modal_locations=None, statistic="avg", unit="km", hours="all", method="last", table="all", subscriber_identifier="msisdn", ignore_nulls=True, subscriber_subset=None, ): # need to subtract one day from hl end in order to be # comparing over same period... self.stop_sl = stop self.stop_hl = str(parse_datestring(stop) - relativedelta(days=1)) self.start = start if modal_locations: if isinstance(modal_locations, ModalLocation): hl = modal_locations else: raise ValueError( "Argument 'modal_locations' should be an instance of ModalLocation class" ) hl.spatial_unit.verify_criterion("has_lon_lat_columns") else: hl = ModalLocation(*[ daily_location( date, spatial_unit=make_spatial_unit("lon-lat"), hours=hours, method=method, table=table, subscriber_identifier=subscriber_identifier, ignore_nulls=ignore_nulls, subscriber_subset=subscriber_subset, ) for date in list_of_dates(self.start, self.stop_hl) ]) sl = SubscriberLocations( self.start, self.stop_sl, spatial_unit=make_spatial_unit("lon-lat"), hours=hours, table=table, subscriber_identifier=subscriber_identifier, ignore_nulls=ignore_nulls, subscriber_subset=subscriber_subset, ) self.statistic = statistic.lower() if self.statistic not in valid_stats: raise ValueError( "{} is not a valid statistic. Use one of {}".format( self.statistic, valid_stats)) self.joined = hl.join( sl, on_left="subscriber", on_right="subscriber", how="left", left_append="_home_loc", right_append="", ) self.unit = unit super().__init__()