Пример #1
0
    def __init__(self, sensor: Sensor, source: BeliefSource, value: float,
                 **kwargs):
        self.sensor = sensor
        self.source = source
        self.event_value = value

        if "cumulative_probability" in kwargs:
            self.cumulative_probability = kwargs["cumulative_probability"]
        elif "cp" in kwargs:
            self.cumulative_probability = kwargs["cp"]
        elif "sigma" in kwargs:
            self.cumulative_probability = (
                1 / 2 + (math.erf(kwargs["sigma"] / 2**0.5)) / 2)
        else:
            self.cumulative_probability = 0.5
        if "event_start" in kwargs:
            self.event_start = tb_utils.enforce_utc(kwargs["event_start"])
        elif "event_time" in kwargs:
            if self.sensor.event_resolution != timedelta():
                raise KeyError(
                    "Sensor has a non-zero resolution, so it doesn't measure instantaneous events. "
                    "Use event_start instead of event_time.")
            self.event_start = tb_utils.enforce_utc(kwargs["event_time"])
        if "belief_horizon" in kwargs:
            self.belief_horizon = kwargs["belief_horizon"]
        elif "belief_time" in kwargs:
            belief_time = tb_utils.enforce_utc(kwargs["belief_time"])
            self.belief_horizon = (
                self.sensor.knowledge_time(self.event_start) - belief_time)
Пример #2
0
    def fixed_viewpoint(
        self,
        belief_time: datetime = None,
        belief_time_window: Tuple[Optional[datetime], Optional[datetime]] = (
            None,
            None,
        ),
        update_belief_times: bool = False,
    ) -> "BeliefsDataFrame":
        """Select the most recent belief about each event at a given belief time.
        NB: with a fixed viewpoint the horizon increases as you look further ahead.
        Alternatively, select the most recent belief formed within a certain time window. This allows setting a maximum
        freshness of the data.

        :Example:

        >>> # Select the latest beliefs formed before June 6th 2018 about each event
        >>> df.fixed_viewpoint(belief_time=datetime(2018, 6, 6))
        >>> # Or equivalently:
        >>> df.fixed_viewpoint(belief_time_window=(None, datetime(2018, 6, 6, tzinfo=utc)))
        >>> # Select the latest beliefs formed from June 1st to June 6th (up to June 6th 0:00 AM)
        >>> df.fixed_viewpoint(belief_time_window=(datetime(2018, 6, 1, tzinfo=utc), datetime(2018, 6, 6, tzinfo=utc)))

        :param belief_time: datetime indicating the belief should be formed at least before this time
        :param belief_time_window: optional tuple specifying a time window within which beliefs should have been formed
        :param update_belief_times: if True, update the belief time of each belief with the given fixed viewpoint
        """
        if belief_time is not None:
            if belief_time_window != (None, None):
                raise ValueError(
                    "Cannot pass both a belief time and belief time window.")
            belief_time_window = (None, belief_time)
        df = self
        if "belief_time" not in df.index.names:
            df = df.convert_index_from_belief_horizon_to_time()
        if belief_time_window[0] is not None:
            df = df[df.index.get_level_values("belief_time") >=
                    tb_utils.enforce_utc(belief_time_window[0])]
        if belief_time_window[1] is not None:
            df = df[df.index.get_level_values("belief_time") <=
                    tb_utils.enforce_utc(belief_time_window[1])]
        df = belief_utils.select_most_recent_belief(df)
        if update_belief_times is True:
            return tb_utils.replace_multi_index_level(
                df,
                "belief_time",
                pd.DatetimeIndex(data=[belief_time_window[1]] * len(df.index)),
            )
        else:
            return df
Пример #3
0
    def belief_history(
        self,
        event_start: datetime,
        belief_time_window: Tuple[Optional[datetime], Optional[datetime]] = (
            None,
            None,
        ),
        belief_horizon_window: Tuple[Optional[timedelta],
                                     Optional[timedelta]] = (
                                         None,
                                         None,
                                     ),
        keep_event_start: bool = False,
    ) -> "BeliefsDataFrame":
        """Select all beliefs about a single event, identified by the event's start time.
        Optionally select a history of beliefs formed within a certain time window.
        Alternatively, select a history of beliefs formed a certain horizon window before knowledge time (with negative
        horizons indicating post knowledge time).

        :Example:

        >>> # Select beliefs formed before June 20th 2018
        >>> df.belief_history(event_start, belief_time_window=(None, datetime(2018, 6, 20, tzinfo=utc)))
        >>> # Select beliefs formed from 5 to 10 hours before knowledge time
        >>> df.belief_history(event_start, belief_horizon_window=(timedelta(hours=5), timedelta(hours=10)))
        >>> # Select beliefs formed from 2 hours after to 10 hours before knowledge time
        >>> df.belief_history(event_start, belief_horizon_window=(timedelta(hours=-2), timedelta(hours=10)))

        :param event_start: start time of the event
        :param belief_time_window: optional tuple specifying a time window within which beliefs should have been formed
        :param belief_horizon_window: optional tuple specifying a horizon window
               (e.g. between 1 and 2 hours before the event value could have been known)
        """
        df = self.xs(tb_utils.enforce_utc(event_start),
                     level="event_start",
                     drop_level=False).sort_index()
        if belief_time_window[0] is not None:
            df = df[df.index.get_level_values("belief_time") >=
                    belief_time_window[0]]
        if belief_time_window[1] is not None:
            df = df[df.index.get_level_values("belief_time") <=
                    belief_time_window[1]]
        if belief_horizon_window != (None, None):
            if belief_time_window != (None, None):
                raise ValueError(
                    "Cannot pass both a belief time window and belief horizon window."
                )
            df = df.convert_index_from_belief_time_to_horizon()
            if belief_horizon_window[0] is not None:
                df = df[df.index.get_level_values("belief_horizon") >=
                        belief_horizon_window[0]]
            if belief_horizon_window[1] is not None:
                df = df[df.index.get_level_values("belief_horizon") <=
                        belief_horizon_window[1]]
            df = df.convert_index_from_belief_horizon_to_time()
        if not keep_event_start:
            df = df.droplevel("event_start")
        return df
Пример #4
0
    def query(
        cls,
        session: Session,
        sensor: DBSensor,
        event_before: datetime = None,
        event_not_before: datetime = None,
        belief_before: datetime = None,
        belief_not_before: datetime = None,
        source: Union[int, List[int], str, List[str]] = None,
    ) -> "BeliefsDataFrame":
        """Query beliefs about sensor events.
        :param session: the database session to use
        :param sensor: sensor to which the beliefs pertain
        :param event_before: only return beliefs about events that end before this datetime (inclusive)
        :param event_not_before: only return beliefs about events that start after this datetime (inclusive)
        :param belief_before: only return beliefs formed before this datetime (inclusive)
        :param belief_not_before: only return beliefs formed after this datetime (inclusive)
        :param source: only return beliefs formed by the given source or list of sources (pass their id or name)
        :returns: a multi-index DataFrame with all relevant beliefs

        TODO: rename params for clarity: event_finished_before, even_starts_not_before (or similar), same for beliefs
        """

        # Check for timezone-aware datetime input
        if event_before is not None:
            event_before = tb_utils.enforce_utc(event_before)
        if event_not_before is not None:
            event_not_before = tb_utils.enforce_utc(event_not_before)
        if belief_before is not None:
            belief_before = tb_utils.enforce_utc(belief_before)
        if belief_not_before is not None:
            belief_not_before = tb_utils.enforce_utc(belief_not_before)

        # Query sensor for relevant timing properties
        event_resolution, knowledge_horizon_fnc, knowledge_horizon_par = (
            session.query(
                DBSensor.event_resolution,
                DBSensor.knowledge_horizon_fnc,
                DBSensor.knowledge_horizon_par,
            ).filter(DBSensor.id == sensor.id).one_or_none())

        # Get bounds on the knowledge horizon (so we can already roughly filter by belief time)
        knowledge_horizon_min, knowledge_horizon_max = sensor_utils.eval_verified_knowledge_horizon_fnc(
            knowledge_horizon_fnc, knowledge_horizon_par, None)

        # Query based on start_time_window
        q = session.query(cls).filter(cls.sensor_id == sensor.id)

        # Apply event time filter
        if event_before is not None:
            q = q.filter(cls.event_start + event_resolution <= event_before)
        if event_not_before is not None:
            q = q.filter(cls.event_start >= event_not_before)

        # Apply rough belief time filter
        if belief_before is not None:
            q = q.filter(cls.event_start <= belief_before +
                         cls.belief_horizon + knowledge_horizon_max)
        if belief_not_before is not None:
            q = q.filter(cls.event_start >= belief_not_before +
                         cls.belief_horizon + knowledge_horizon_min)

        # Apply source filter
        if source is not None:
            source_list = [source] if not isinstance(source, list) else source
            id_list = [s for s in source_list if isinstance(s, int)]
            name_list = [s for s in source_list if isinstance(s, str)]
            if len(id_list) + len(name_list) < len(source_list):
                unidentifiable_list = [
                    s for s in source_list
                    if not isinstance(s, int) and not isinstance(s, str)
                ]
                raise ValueError(
                    "Query by source failed: query only possible by integer id or string name. Failed sources: %s"
                    % unidentifiable_list)
            else:
                q = q.join(DBBeliefSource).filter(
                    (cls.source_id.in_(id_list))
                    | (DBBeliefSource.name.in_(name_list)))

        # Build our DataFrame of beliefs
        df = BeliefsDataFrame(sensor=sensor, beliefs=q.all())

        # Actually filter by belief time
        if belief_before is not None:
            df = df[df.index.get_level_values("belief_time") < belief_before]
        if belief_not_before is not None:
            df = df[
                df.index.get_level_values("belief_time") >= belief_not_before]

        return df
Пример #5
0
 def knowledge_time(self, event_start: datetime) -> datetime:
     event_start = enforce_utc(event_start)
     return event_start - self.knowledge_horizon(event_start)
Пример #6
0
 def knowledge_horizon(self, event_start: datetime = None) -> timedelta:
     event_start = enforce_utc(event_start)
     return eval_verified_knowledge_horizon_fnc(self.knowledge_horizon_fnc,
                                                self.knowledge_horizon_par,
                                                event_start)
Пример #7
0
    def __init__(self, *args, **kwargs):
        """Initialise a multi-index DataFrame with beliefs about a unique sensor."""

        # Obtain parameters that are specific to our DataFrame subclass
        sensor: Sensor = kwargs.pop("sensor", None)
        source: BeliefSource = kwargs.pop("source", None)
        event_start: datetime = kwargs.pop("event_start", None)
        belief_time: datetime = kwargs.pop("belief_time", None)
        cumulative_probability: float = kwargs.pop("cumulative_probability",
                                                   None)
        beliefs: List[TimedBelief] = kwargs.pop("beliefs", None)

        # Define our columns and indices
        columns = ["event_value"]
        indices = [
            "event_start", "belief_time", "source", "cumulative_probability"
        ]

        # Use our constructor if initialising from a previous (Beliefs)DataFrame (e.g. when slicing), copying the Sensor metadata
        # TODO: how is the metadata copied here?
        if beliefs is None:
            super().__init__(*args, **kwargs)
            if isinstance(args[0], pd.DataFrame):

                # Set (possibly overwrite) each index level to a unique value if set explicitly
                if source is not None:
                    self["source"] = source
                if event_start is not None:
                    self["event_start"] = tb_utils.enforce_utc(event_start)
                if belief_time is not None:
                    self["belief_time"] = tb_utils.enforce_utc(belief_time)
                if cumulative_probability is not None:
                    self["cumulative_probability"] = cumulative_probability

                # Check for correct types and convert if possible
                self["event_start"] = pd.to_datetime(self["event_start"],
                                                     utc=True)
                self["belief_time"] = pd.to_datetime(self["belief_time"],
                                                     utc=True)
                if any(c != BeliefSource for c in self["source"].map(type)):
                    warnings.warn(
                        "DataFrame contains sources of type other than BeliefSource."
                    )

                # Set index levels and metadata
                self.set_index(indices, inplace=True)
                self.sensor = sensor
                self.event_resolution = sensor.event_resolution
            return

        # Call the pandas DataFrame constructor with the right input
        kwargs["columns"] = columns
        if beliefs:
            sources = set(belief.source for belief in beliefs)
            source_names = set(source.name for source in sources)
            if len(source_names) != len(sources):
                raise ValueError(
                    "Source names must be unique. Cannot initialise BeliefsDataFrame given the following unique sources:\n%s"
                    % sources)
            beliefs = sorted(
                beliefs,
                key=lambda b: (
                    b.event_start,
                    b.belief_time,
                    b.source,
                    b.cumulative_probability,
                ),
            )
            kwargs["data"] = [[getattr(i, j) for j in columns]
                              for i in beliefs]
            kwargs["index"] = pd.MultiIndex.from_tuples(
                [[getattr(i, j) for j in indices] for i in beliefs],
                names=indices)
        else:
            kwargs["index"] = pd.MultiIndex(
                levels=[[] for _ in indices],
                codes=[[] for _ in indices],
                names=indices)  # Todo support pandas 0.23
        super().__init__(*args, **kwargs)

        # Clean up duplicate beliefs
        self.reset_index(inplace=True)
        self.drop_duplicates(inplace=True)
        self.set_index(indices, inplace=True)

        # Set the Sensor metadata (including timing properties of the sensor)
        self.sensor = sensor
        self.event_resolution = self.sensor.event_resolution