def __init__(
        self,
        start,
        stop,
        statistic="avg",
        *,
        hours="all",
        tables="all",
        direction: Union[str, Direction] = Direction.BOTH,
        subscriber_subset=None,
        exclude_self_calls=True,
    ):
        self.tables = tables
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.direction = Direction(direction)
        self.exclude_self_calls = exclude_self_calls

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        column_list = [
            "msisdn", "msisdn_counterpart", "id", "location_id", "outgoing"
        ]
        self.tables = tables

        # EventsTablesUnion will only subset on the subscriber identifier,
        # which means that we need to query for a unioned table twice. That has
        # a considerable negative impact on execution time.
        self.unioned_from_query = EventsTablesUnion(
            self.start,
            self.stop,
            columns=column_list,
            tables=self.tables,
            subscriber_identifier="msisdn",
            hours=hours,
            subscriber_subset=subscriber_subset,
        )

        self.unioned_to_query = EventsTablesUnion(
            self.start,
            self.stop,
            columns=column_list,
            tables=self.tables,
            subscriber_identifier="msisdn_counterpart",
            hours=hours,
            subscriber_subset=subscriber_subset,
        )

        self.distance_matrix = DistanceMatrix()

        super().__init__()
def test_events_table_union_subscriber_ident_substitutions(ident):
    """Test that EventTableSubset replaces the subscriber ident column name with subscriber."""
    etu = EventsTablesUnion(
        "2016-01-01",
        "2016-01-02",
        columns=[ident],
        tables=["events.calls"],
        subscriber_identifier=ident,
    )
    assert "subscriber" == etu.head(0).columns[0]
    assert ["subscriber"] == etu.column_names
Пример #3
0
    def __init__(
        self,
        start,
        stop,
        hours=(20, 4),
        *,
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.BOTH,
        subscriber_subset=None,
        tables="all",
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)
        self.hours = hours
        self.tables = tables

        column_list = [
            self.subscriber_identifier,
            "datetime",
            *self.direction.required_columns,
        ]

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            tables=self.tables,
            columns=column_list,
            hours="all",
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )
        super().__init__()
def test_length(get_length):
    """
    Test that EventsTablesUnion has the correct length
    """
    etu = EventsTablesUnion(
        "2016-01-01", "2016-01-02", columns=["msisdn", "msisdn_counterpart", "datetime"]
    )
    assert get_length(etu) == 2500
Пример #5
0
    def __init__(
        self,
        start,
        stop,
        *,
        subscriber_identifier="msisdn",
        statistic="sum",
        spatial_unit: Optional[AnySpatialUnit] = None,
        hours="all",
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("admin", level=3)
        else:
            self.spatial_unit = spatial_unit
        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats
                )
            )

        column_list = [
            "id",
            self.subscriber_identifier,
            "msisdn_counterpart",
            "outgoing",
            "duration",
            "location_id",
            "datetime",
        ]
        unioned_query = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                tables="events.calls",
                columns=column_list,
                hours=hours,
                subscriber_subset=subscriber_subset,
                subscriber_identifier=self.subscriber_identifier,
            ),
            spatial_unit=self.spatial_unit,
            time_col="datetime",
        )

        self.joined = unioned_query.subset("outgoing", "t").join(
            unioned_query.subset("outgoing", "f"),
            on_left="id",
            on_right="id",
            right_append="_counterpart",
            how="left",
        )
        warnings.warn("This query is considerably slower than the other variants.")
        super().__init__()
Пример #6
0
    def __init__(
        self,
        start: str,
        stop: str,
        *,
        table: Union[None, List[str]] = None,
        spatial_unit: AnySpatialUnit = make_spatial_unit("cell"),
        interval: str = "hour",
        direction: Union[str, Direction] = Direction.BOTH,
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
        subscriber_identifier="msisdn",
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.table = table
        self.spatial_unit = spatial_unit
        self.interval = interval
        self.direction = Direction(direction)

        if self.interval not in self.allowed_intervals:
            raise ValueError("'Interval must be one of: {} got: {}".format(
                self.allowed_intervals, self.interval))

        self.time_cols = ["(datetime::date)::text AS date"]
        if self.interval == "hour" or self.interval == "min":
            self.time_cols.append("extract(hour FROM datetime) AS hour")
        if self.interval == "min":
            self.time_cols.append("extract(minute FROM datetime) AS min")

        events_tables_union_cols = [
            "location_id", "datetime", subscriber_identifier
        ]
        # if we need to filter on outgoing/incoming calls, we will also fetch this
        # column. Don't fetch it if it is not needed for both efficiency and the
        # possibility that we might want to do pass another data type which does not
        # have this information.
        events_tables_union_cols += self.direction.required_columns

        self.unioned = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                tables=self.table,
                columns=events_tables_union_cols,
                hours=hours,
                subscriber_subset=subscriber_subset,
                subscriber_identifier=subscriber_identifier,
            ),
            spatial_unit=self.spatial_unit,
            time_col="datetime",
        )
        super().__init__()
def test_get_list_of_tables(get_length):
    """
    Test that we can get only sms
    """

    etu = EventsTablesUnion(
        "2016-01-01",
        "2016-01-02",
        columns=["msisdn", "msisdn_counterpart", "datetime"],
        tables=["events.calls", "events.sms"],
    )
    assert get_length(etu) == 2500
Пример #8
0
    def __init__(
        self,
        start,
        stop,
        *,
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.OUT,
        statistic="sum",
        spatial_unit: Optional[AnySpatialUnit] = None,
        hours="all",
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("admin", level=3)
        else:
            self.spatial_unit = spatial_unit
        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats
                )
            )

        column_list = [
            self.subscriber_identifier,
            "msisdn_counterpart",
            "duration",
            "location_id",
            "datetime",
            *self.direction.required_columns,
        ]
        self.unioned_query = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                tables="events.calls",
                columns=column_list,
                hours=hours,
                subscriber_subset=subscriber_subset,
                subscriber_identifier=self.subscriber_identifier,
            ),
            spatial_unit=self.spatial_unit,
            time_col="datetime",
        )
        super().__init__()
Пример #9
0
    def __init__(
        self,
        start,
        stop,
        statistic="avg",
        *,
        spatial_unit: AnySpatialUnit = make_spatial_unit("cell"),
        hours="all",
        tables="all",
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.BOTH,
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.spatial_unit = spatial_unit
        self.hours = hours
        self.tables = tables
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)
        self.statistic = statistic

        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        column_list = [
            self.subscriber_identifier,
            "location_id",
            "datetime",
            *self.direction.required_columns,
        ]

        self.unioned_query = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                tables=self.tables,
                columns=column_list,
                hours=hours,
                subscriber_identifier=subscriber_identifier,
                subscriber_subset=subscriber_subset,
            ),
            spatial_unit=self.spatial_unit,
            time_col="datetime",
        )

        super().__init__()
Пример #10
0
    def __init__(
        self,
        start,
        stop,
        contact_reciprocal,
        *,
        direction: Union[str, Direction] = Direction.OUT,
        subscriber_identifier="msisdn",
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
        tables="all",
        exclude_self_calls=True,
    ):

        self.start = start
        self.stop = stop
        self.subscriber_identifier = subscriber_identifier
        self.hours = hours
        self.exclude_self_calls = exclude_self_calls
        self.direction = Direction(direction)
        self.tables = tables

        column_list = [
            self.subscriber_identifier,
            "msisdn",
            "msisdn_counterpart",
            *self.direction.required_columns,
        ]

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            tables=self.tables,
            columns=column_list,
            hours=hours,
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )

        self.contact_reciprocal_query = contact_reciprocal

        super().__init__()
Пример #11
0
    def __init__(
        self,
        start,
        stop,
        *,
        hours: Optional[Tuple[int, int]] = None,
        tables="all",
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.BOTH,
        exclude_self_calls=True,
        subscriber_subset=None,
    ):
        self.tables = tables
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.direction = Direction(direction)
        self.subscriber_identifier = subscriber_identifier
        self.exclude_self_calls = exclude_self_calls
        self.tables = tables

        column_list = [
            self.subscriber_identifier,
            "msisdn_counterpart",
            *self.direction.required_columns,
        ]

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            columns=column_list,
            tables=self.tables,
            subscriber_identifier=self.subscriber_identifier,
            hours=hours,
            subscriber_subset=subscriber_subset,
        )
        self._cols = [
            "subscriber", "msisdn_counterpart", "events", "proportion"
        ]
        super().__init__()
Пример #12
0
    def __init__(
        self,
        start,
        stop,
        *,
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.OUT,
        statistic="sum",
        hours="all",
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        self.hours = hours
        self.direction = Direction(direction)
        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats
                )
            )

        column_list = [
            self.subscriber_identifier,
            "duration",
            *self.direction.required_columns,
        ]
        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            tables="events.calls",
            columns=column_list,
            hours=hours,
            subscriber_subset=subscriber_subset,
            subscriber_identifier=subscriber_identifier,
        )
        super().__init__()
Пример #13
0
    def __init__(
        self,
        start: str,
        stop: str,
        *,
        table: str = "all",
        spatial_unit: AnySpatialUnit = make_spatial_unit("cell"),
        direction: Union[Direction, str] = Direction.BOTH,
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
        subscriber_identifier="msisdn",
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.table = table
        self.spatial_unit = spatial_unit
        self.direction = Direction(direction)

        self.unioned_query = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                columns=[
                    "id",
                    "outgoing",
                    "location_id",
                    "datetime",
                    subscriber_identifier,
                ],
                tables=self.table,
                hours=hours,
                subscriber_subset=subscriber_subset,
                subscriber_identifier=subscriber_identifier,
            ),
            spatial_unit=self.spatial_unit,
            time_col="datetime",
        )

        super().__init__()
Пример #14
0
    def __init__(
        self,
        start,
        stop,
        *,
        subscriber_identifier="msisdn",
        statistic="sum",
        hours="all",
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats
                )
            )

        column_list = [
            self.subscriber_identifier,
            "outgoing",
            "duration",
            "msisdn_counterpart",
        ]

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            tables="events.calls",
            columns=column_list,
            hours=hours,
            subscriber_subset=subscriber_subset,
            subscriber_identifier=self.subscriber_identifier,
        )
        super().__init__()
Пример #15
0
    def __init__(
        self,
        start,
        stop,
        *,
        hours="all",
        tables="all",
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.BOTH,
        exclude_self_calls=True,
        subscriber_subset=None,
    ):
        self.start = start
        self.stop = stop
        self.hours = hours
        self.direction = Direction(direction)
        self.subscriber_identifier = subscriber_identifier
        self.exclude_self_calls = exclude_self_calls
        self.tables = tables

        column_list = [
            self.subscriber_identifier,
            "msisdn_counterpart",
            *self.direction.required_columns,
        ]

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            hours=self.hours,
            tables=self.tables,
            columns=column_list,
            subscriber_identifier=self.subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )
        self._cols = ["subscriber", "degree"]
        super().__init__()
Пример #16
0
    def __init__(
        self,
        start,
        stop,
        phase="hour",
        *,
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.BOTH,
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
        tables="all",
    ):

        self.tables = tables
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)
        self.hours = hours

        column_list = [
            self.subscriber_identifier,
            "datetime",
            *self.direction.required_columns,
        ]

        # extracted from the POSTGRES manual
        allowed_phases = (
            "century",
            "day",
            "decade",
            "dow",
            "doy",
            "epoch",
            "hour",
            "isodow",
            "isoyear",
            "microseconds",
            "millennium",
            "milliseconds",
            "minute",
            "month",
            "quarter",
            "second",
            "week",
            "year",
        )

        if phase not in allowed_phases:
            raise ValueError(
                f"{phase} is not a valid phase. Choose one of {allowed_phases}"
            )

        self.phase = phase

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            tables=self.tables,
            columns=column_list,
            hours=hours,
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )
        super().__init__()
def test_events_tables_union_column_names(columns):
    """Test that EventsTableUnion column_names property is accurate."""
    etu = EventsTablesUnion(
        "2016-01-01", "2016-01-02", columns=columns, tables=["events.calls"]
    )
    assert etu.head(0).columns.tolist() == etu.column_names
def test_events_tables_union_raises_error():
    """EventsTablesUnion should error when trying to use all columns with disparate event types"""
    with pytest.raises(ValueError):
        EventsTablesUnion("2016-01-01", "2016-01-02", columns=["*"])