def test_do_not_drop_changed_probabilistic_belief(setup_beliefs): """Trying to save a changed probabilistic belief should result in saving the whole belief. For example, given a belief that defines both cp=0.2 and cp=0.5, if that belief becomes more certain (e.g. cp=0.3 and cp=0.5), we expect to see the full new belief stored, rather than just the cp=0.3 value. """ # Set a reference for the number of beliefs stored sensor = Sensor.query.filter_by(name="epex_da").one_or_none() bdf = sensor.search_beliefs(source="ENTSO-E", most_recent_beliefs_only=False) num_beliefs_before = len(bdf) # See what happens when storing a belief with more certainty one hour later old_belief = bdf.loc[(bdf.index.get_level_values("event_start") == pd.Timestamp("2021-03-28 16:00:00+00:00")) & (bdf.index.get_level_values("belief_time") == pd.Timestamp("2021-03-27 9:00:00+00:00"))] new_belief = tb_utils.replace_multi_index_level(old_belief, "cumulative_probability", pd.Index([0.3, 0.5])) new_belief = tb_utils.replace_multi_index_level( new_belief, "belief_time", new_belief.belief_times + pd.Timedelta("1H")) save_to_db(new_belief) # Verify that the whole probabilistic belief was added bdf = sensor.search_beliefs(source="ENTSO-E", most_recent_beliefs_only=False) num_beliefs_after = len(bdf) assert num_beliefs_after == num_beliefs_before + len(new_belief)
def test_drop_unchanged_beliefs(setup_beliefs): """Trying to save beliefs that are already in the database shouldn't raise an error. Even after updating the belief time, we expect to persist only the older belief time. """ # Set a reference for the number of beliefs stored and their belief times sensor = Sensor.query.filter_by(name="epex_da").one_or_none() bdf = sensor.search_beliefs(most_recent_beliefs_only=False) num_beliefs_before = len(bdf) belief_times_before = bdf.belief_times # See what happens when storing all existing beliefs verbatim save_to_db(bdf) # Verify that no new beliefs were saved bdf = sensor.search_beliefs(most_recent_beliefs_only=False) assert len(bdf) == num_beliefs_before # See what happens when storing all beliefs with their belief time updated bdf = tb_utils.replace_multi_index_level( bdf, "belief_time", bdf.belief_times + pd.Timedelta("1H")) save_to_db(bdf) # Verify that no new beliefs were saved bdf = sensor.search_beliefs(most_recent_beliefs_only=False) assert len(bdf) == num_beliefs_before assert list(bdf.belief_times) == list(belief_times_before)
def fixed_viewpoint( self, belief_time: datetime = None, belief_time_window: Tuple[Optional[datetime], Optional[datetime]] = ( None, None, ), update_belief_times: bool = False, ) -> "BeliefsDataFrame": """Select the most recent belief about each event at a given belief time. NB: with a fixed viewpoint the horizon increases as you look further ahead. Alternatively, select the most recent belief formed within a certain time window. This allows setting a maximum freshness of the data. :Example: >>> # Select the latest beliefs formed before June 6th 2018 about each event >>> df.fixed_viewpoint(belief_time=datetime(2018, 6, 6)) >>> # Or equivalently: >>> df.fixed_viewpoint(belief_time_window=(None, datetime(2018, 6, 6, tzinfo=utc))) >>> # Select the latest beliefs formed from June 1st to June 6th (up to June 6th 0:00 AM) >>> df.fixed_viewpoint(belief_time_window=(datetime(2018, 6, 1, tzinfo=utc), datetime(2018, 6, 6, tzinfo=utc))) :param belief_time: datetime indicating the belief should be formed at least before this time :param belief_time_window: optional tuple specifying a time window within which beliefs should have been formed :param update_belief_times: if True, update the belief time of each belief with the given fixed viewpoint """ if belief_time is not None: if belief_time_window != (None, None): raise ValueError( "Cannot pass both a belief time and belief time window.") belief_time_window = (None, belief_time) df = self if "belief_time" not in df.index.names: df = df.convert_index_from_belief_horizon_to_time() if belief_time_window[0] is not None: df = df[df.index.get_level_values("belief_time") >= tb_utils.enforce_utc(belief_time_window[0])] if belief_time_window[1] is not None: df = df[df.index.get_level_values("belief_time") <= tb_utils.enforce_utc(belief_time_window[1])] df = belief_utils.select_most_recent_belief(df) if update_belief_times is True: return tb_utils.replace_multi_index_level( df, "belief_time", pd.DatetimeIndex(data=[belief_time_window[1]] * len(df.index)), ) else: return df
def test_replace_index_level_with_intersect(df_4323): """Test replacing an index level. First test deterministic beliefs, then probabilistic beliefs.""" df = df_4323.xs(0.5, level="cumulative_probability", drop_level=False) df = replace_multi_index_level( df, "event_start", pd.date_range( start=df.index.get_level_values(0)[0], periods=1, freq=df.sensor.event_resolution, ), intersection=True, ) assert len(df.index) == 6 # 2 sources each having 3 deterministic beliefs df = replace_multi_index_level(df, "event_start", pd.Index([]), intersection=True) assert len(df.index) == 0 # Todo: uncomment below to test probabilistic beliefs df = df_4323 df = replace_multi_index_level( df, "event_start", pd.date_range( start=df.index.get_level_values(0)[0], periods=1, freq=df.sensor.event_resolution, ), intersection=True, ) assert ( len(df.index) == 18 ) # 2 sources each having 3 probabilistic beliefs with 3 probabilistic values df = replace_multi_index_level(df, "event_start", pd.Index([]), intersection=True) assert len(df.index) == 0
def respect_event_resolution(grouper: DataFrameGroupBy, resolution): """Resample to make sure the df slice contains events with the same frequency as the given resolution. The input BeliefsDataFrame (see below) should represent beliefs about sequential sub-events formed by a single source at a single unique belief time. Extra beliefs are added with nan values. :Example: >>> df = df.groupby([pd.Grouper(freq="1D", level="event_start"), "belief_time", "source"]).pipe(respect_event_resolution, timedelta(hours=1)) So don't pass a BeliefsDataFrame directly, but pipe it so that we receive a DataFrameGroupBy object, which we can iterate over to obtain a BeliefsDataFrame slice for a unique belief time, source and (in our example) day of events. We then make sure an event is stated explicitly for (in our example) each hour. """ # We need to loop over each belief time in this slice, and reindex such that each subslice has rows for each event. Then recombine. # Get a list of n groups, one group for each belief_time with info about how we sliced and the actual slice groups = list(grouper.__iter__()) # Describe the event_start bin for the slices (we take the first, because the slices share the same event_start bin) bin_size = grouper.keys[0].freq bin_start = groups[0][0][0] bin_end = bin_start + bin_size # Build up our new BeliefsDataFrame (by copying over and emptying the rows, the metadata should be copied over) df = groups[0][1].copy().iloc[0:0] for (group) in ( groups ): # Loop over the groups (we grouped by unique belief time and unique source) # Get the BeliefsDataFrame for a unique belief time and source df_slice = group[1] if not df_slice.empty: lvl0 = pd.date_range( start=bin_start, end=bin_end, freq=to_offset(resolution).freqstr, closed="left", name="event_start", ) df = df.append( tb_utils.replace_multi_index_level(df_slice, level="event_start", index=lvl0, intersection=True)) return df
def test_do_not_drop_beliefs_copied_by_another_source(setup_beliefs): """Trying to copy beliefs from one source to another should double the number of beliefs.""" # Set a reference for the number of beliefs stored sensor = Sensor.query.filter_by(name="epex_da").one_or_none() bdf = sensor.search_beliefs(most_recent_beliefs_only=False) num_beliefs_before = len(bdf) # See what happens when storing all belief with their source updated new_source = DataSource(name="Not Seita", type="demo script") bdf = tb_utils.replace_multi_index_level( bdf, "source", pd.Index([new_source] * num_beliefs_before)) save_to_db(bdf) # Verify that all the new beliefs were added bdf = sensor.search_beliefs(most_recent_beliefs_only=False) num_beliefs_after = len(bdf) assert num_beliefs_after == 2 * num_beliefs_before
def set_truth( grouped: DataFrameGroupBy, right_source: "classes.BeliefSource") -> "classes.BeliefsDataFrame": """Overwrite the beliefs of each source by those of the given source. Terminology-wise, we say the given source is considered to be right, so it's beliefs contain the truth to be used as a reference for accuracy calculations. """ # Pick out the group that is considered to contain the true observations gr_dict = dict(grouped.__iter__()) if right_source in gr_dict: truth_group = gr_dict[right_source] else: raise KeyError("Source %s not found in BeliefsDataFrame." % right_source) # Replace each original group with the truth group, while adding back the source for each original group gr_list = [ tb_utils.replace_multi_index_level(truth_group, "source", pd.Index([key] * len(truth_group))) for key, group in grouped ] return pd.concat(gr_list)
def convert_index_from_event_start_to_end(self) -> "BeliefsDataFrame": return tb_utils.replace_multi_index_level(self, "event_start", self.event_ends)
def convert_index_from_belief_horizon_to_time(self) -> "BeliefsDataFrame": return tb_utils.replace_multi_index_level(self, "belief_horizon", self.belief_times)