def test_do_not_drop_changed_probabilistic_belief(setup_beliefs):
    """Trying to save a changed probabilistic belief should result in saving the whole belief.

    For example, given a belief that defines both cp=0.2 and cp=0.5,
    if that belief becomes more certain (e.g. cp=0.3 and cp=0.5),
    we expect to see the full new belief stored, rather than just the cp=0.3 value.
    """

    # Set a reference for the number of beliefs stored
    sensor = Sensor.query.filter_by(name="epex_da").one_or_none()
    bdf = sensor.search_beliefs(source="ENTSO-E",
                                most_recent_beliefs_only=False)
    num_beliefs_before = len(bdf)

    # See what happens when storing a belief with more certainty one hour later
    old_belief = bdf.loc[(bdf.index.get_level_values("event_start") ==
                          pd.Timestamp("2021-03-28 16:00:00+00:00"))
                         & (bdf.index.get_level_values("belief_time") ==
                            pd.Timestamp("2021-03-27 9:00:00+00:00"))]
    new_belief = tb_utils.replace_multi_index_level(old_belief,
                                                    "cumulative_probability",
                                                    pd.Index([0.3, 0.5]))
    new_belief = tb_utils.replace_multi_index_level(
        new_belief, "belief_time",
        new_belief.belief_times + pd.Timedelta("1H"))
    save_to_db(new_belief)

    # Verify that the whole probabilistic belief was added
    bdf = sensor.search_beliefs(source="ENTSO-E",
                                most_recent_beliefs_only=False)
    num_beliefs_after = len(bdf)
    assert num_beliefs_after == num_beliefs_before + len(new_belief)
def test_drop_unchanged_beliefs(setup_beliefs):
    """Trying to save beliefs that are already in the database shouldn't raise an error.

    Even after updating the belief time, we expect to persist only the older belief time.
    """

    # Set a reference for the number of beliefs stored and their belief times
    sensor = Sensor.query.filter_by(name="epex_da").one_or_none()
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    num_beliefs_before = len(bdf)
    belief_times_before = bdf.belief_times

    # See what happens when storing all existing beliefs verbatim
    save_to_db(bdf)

    # Verify that no new beliefs were saved
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    assert len(bdf) == num_beliefs_before

    # See what happens when storing all beliefs with their belief time updated
    bdf = tb_utils.replace_multi_index_level(
        bdf, "belief_time", bdf.belief_times + pd.Timedelta("1H"))
    save_to_db(bdf)

    # Verify that no new beliefs were saved
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    assert len(bdf) == num_beliefs_before
    assert list(bdf.belief_times) == list(belief_times_before)
示例#3
0
    def fixed_viewpoint(
        self,
        belief_time: datetime = None,
        belief_time_window: Tuple[Optional[datetime], Optional[datetime]] = (
            None,
            None,
        ),
        update_belief_times: bool = False,
    ) -> "BeliefsDataFrame":
        """Select the most recent belief about each event at a given belief time.
        NB: with a fixed viewpoint the horizon increases as you look further ahead.
        Alternatively, select the most recent belief formed within a certain time window. This allows setting a maximum
        freshness of the data.

        :Example:

        >>> # Select the latest beliefs formed before June 6th 2018 about each event
        >>> df.fixed_viewpoint(belief_time=datetime(2018, 6, 6))
        >>> # Or equivalently:
        >>> df.fixed_viewpoint(belief_time_window=(None, datetime(2018, 6, 6, tzinfo=utc)))
        >>> # Select the latest beliefs formed from June 1st to June 6th (up to June 6th 0:00 AM)
        >>> df.fixed_viewpoint(belief_time_window=(datetime(2018, 6, 1, tzinfo=utc), datetime(2018, 6, 6, tzinfo=utc)))

        :param belief_time: datetime indicating the belief should be formed at least before this time
        :param belief_time_window: optional tuple specifying a time window within which beliefs should have been formed
        :param update_belief_times: if True, update the belief time of each belief with the given fixed viewpoint
        """
        if belief_time is not None:
            if belief_time_window != (None, None):
                raise ValueError(
                    "Cannot pass both a belief time and belief time window.")
            belief_time_window = (None, belief_time)
        df = self
        if "belief_time" not in df.index.names:
            df = df.convert_index_from_belief_horizon_to_time()
        if belief_time_window[0] is not None:
            df = df[df.index.get_level_values("belief_time") >=
                    tb_utils.enforce_utc(belief_time_window[0])]
        if belief_time_window[1] is not None:
            df = df[df.index.get_level_values("belief_time") <=
                    tb_utils.enforce_utc(belief_time_window[1])]
        df = belief_utils.select_most_recent_belief(df)
        if update_belief_times is True:
            return tb_utils.replace_multi_index_level(
                df,
                "belief_time",
                pd.DatetimeIndex(data=[belief_time_window[1]] * len(df.index)),
            )
        else:
            return df
def test_replace_index_level_with_intersect(df_4323):
    """Test replacing an index level.
    First test deterministic beliefs, then probabilistic beliefs."""

    df = df_4323.xs(0.5, level="cumulative_probability", drop_level=False)
    df = replace_multi_index_level(
        df,
        "event_start",
        pd.date_range(
            start=df.index.get_level_values(0)[0],
            periods=1,
            freq=df.sensor.event_resolution,
        ),
        intersection=True,
    )
    assert len(df.index) == 6  # 2 sources each having 3 deterministic beliefs
    df = replace_multi_index_level(df, "event_start", pd.Index([]), intersection=True)
    assert len(df.index) == 0

    # Todo: uncomment below to test probabilistic beliefs
    df = df_4323
    df = replace_multi_index_level(
        df,
        "event_start",
        pd.date_range(
            start=df.index.get_level_values(0)[0],
            periods=1,
            freq=df.sensor.event_resolution,
        ),
        intersection=True,
    )
    assert (
        len(df.index) == 18
    )  # 2 sources each having 3 probabilistic beliefs with 3 probabilistic values
    df = replace_multi_index_level(df, "event_start", pd.Index([]), intersection=True)
    assert len(df.index) == 0
def respect_event_resolution(grouper: DataFrameGroupBy, resolution):
    """Resample to make sure the df slice contains events with the same frequency as the given resolution.
    The input BeliefsDataFrame (see below) should represent beliefs about sequential sub-events formed by a single source
    at a single unique belief time.
    Extra beliefs are added with nan values.

    :Example:

    >>> df = df.groupby([pd.Grouper(freq="1D", level="event_start"), "belief_time", "source"]).pipe(respect_event_resolution, timedelta(hours=1))

    So don't pass a BeliefsDataFrame directly, but pipe it so that we receive a DataFrameGroupBy object, which we can
    iterate over to obtain a BeliefsDataFrame slice for a unique belief time, source and (in our example) day of
    events. We then make sure an event is stated explicitly for (in our example) each hour.
    """

    # We need to loop over each belief time in this slice, and reindex such that each subslice has rows for each event. Then recombine.

    # Get a list of n groups, one group for each belief_time with info about how we sliced and the actual slice
    groups = list(grouper.__iter__())

    # Describe the event_start bin for the slices (we take the first, because the slices share the same event_start bin)
    bin_size = grouper.keys[0].freq
    bin_start = groups[0][0][0]
    bin_end = bin_start + bin_size

    # Build up our new BeliefsDataFrame (by copying over and emptying the rows, the metadata should be copied over)
    df = groups[0][1].copy().iloc[0:0]
    for (group) in (
            groups
    ):  # Loop over the groups (we grouped by unique belief time and unique source)

        # Get the BeliefsDataFrame for a unique belief time and source
        df_slice = group[1]
        if not df_slice.empty:
            lvl0 = pd.date_range(
                start=bin_start,
                end=bin_end,
                freq=to_offset(resolution).freqstr,
                closed="left",
                name="event_start",
            )
            df = df.append(
                tb_utils.replace_multi_index_level(df_slice,
                                                   level="event_start",
                                                   index=lvl0,
                                                   intersection=True))

    return df
def test_do_not_drop_beliefs_copied_by_another_source(setup_beliefs):
    """Trying to copy beliefs from one source to another should double the number of beliefs."""

    # Set a reference for the number of beliefs stored
    sensor = Sensor.query.filter_by(name="epex_da").one_or_none()
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    num_beliefs_before = len(bdf)

    # See what happens when storing all belief with their source updated
    new_source = DataSource(name="Not Seita", type="demo script")
    bdf = tb_utils.replace_multi_index_level(
        bdf, "source", pd.Index([new_source] * num_beliefs_before))
    save_to_db(bdf)

    # Verify that all the new beliefs were added
    bdf = sensor.search_beliefs(most_recent_beliefs_only=False)
    num_beliefs_after = len(bdf)
    assert num_beliefs_after == 2 * num_beliefs_before
示例#7
0
def set_truth(
        grouped: DataFrameGroupBy,
        right_source: "classes.BeliefSource") -> "classes.BeliefsDataFrame":
    """Overwrite the beliefs of each source by those of the given source.
    Terminology-wise, we say the given source is considered to be right,
    so it's beliefs contain the truth to be used as a reference for accuracy calculations.
    """

    # Pick out the group that is considered to contain the true observations
    gr_dict = dict(grouped.__iter__())
    if right_source in gr_dict:
        truth_group = gr_dict[right_source]
    else:
        raise KeyError("Source %s not found in BeliefsDataFrame." %
                       right_source)

    # Replace each original group with the truth group, while adding back the source for each original group
    gr_list = [
        tb_utils.replace_multi_index_level(truth_group, "source",
                                           pd.Index([key] * len(truth_group)))
        for key, group in grouped
    ]

    return pd.concat(gr_list)
示例#8
0
 def convert_index_from_event_start_to_end(self) -> "BeliefsDataFrame":
     return tb_utils.replace_multi_index_level(self, "event_start",
                                               self.event_ends)
示例#9
0
 def convert_index_from_belief_horizon_to_time(self) -> "BeliefsDataFrame":
     return tb_utils.replace_multi_index_level(self, "belief_horizon",
                                               self.belief_times)