def test_collect_power_resampled(db, app, query_start, query_end, resolution, num_values, setup_test_data): wind_device_1 = Sensor.query.filter_by(name="wind-asset-1").one_or_none() bdf: tb.BeliefsDataFrame = TimedBelief.search( wind_device_1.name, event_starts_after=query_start, event_ends_before=query_end, resolution=resolution, most_recent_beliefs_only=True, ) print(bdf) assert len(bdf) == num_values
def test_persist_beliefs(setup_beliefs, setup_test_data): """Check whether persisting beliefs works. We load the already set up beliefs, and form new beliefs an hour later. """ sensor = Sensor.query.filter_by(name="epex_da").one_or_none() source = DataSource.query.filter_by(name="ENTSO-E").one_or_none() bdf: tb.BeliefsDataFrame = TimedBelief.search( sensor, source=source, most_recent_beliefs_only=False) # Form new beliefs df = bdf.reset_index() df["belief_time"] = df["belief_time"] + timedelta(hours=1) df["event_value"] = df["event_value"] * 10 bdf = df.set_index( ["event_start", "belief_time", "source", "cumulative_probability"]) TimedBelief.add(bdf) bdf: tb.BeliefsDataFrame = TimedBelief.search( sensor, source=source, most_recent_beliefs_only=False) assert len(bdf) == setup_beliefs * 2
def test_query_beliefs(setup_beliefs): """Check various ways of querying for beliefs.""" sensor = Sensor.query.filter_by(name="epex_da").one_or_none() source = DataSource.query.filter_by(name="ENTSO-E").one_or_none() bdfs = [ TimedBelief.search(sensor, source=source, most_recent_beliefs_only=False), TimedBelief.search(sensor.id, source=source, most_recent_beliefs_only=False), TimedBelief.search(sensor.name, source=source, most_recent_beliefs_only=False), sensor.search_beliefs(source=source, most_recent_beliefs_only=False), tb.BeliefsDataFrame(sensor.beliefs)[tb.BeliefsDataFrame( sensor.beliefs).index.get_level_values("source") == source], ] for bdf in bdfs: assert sensor.event_resolution == timedelta(hours=1) assert bdf.event_resolution == timedelta(hours=1) assert len(bdf) == setup_beliefs
def test_simplify_index(setup_test_data, check_empty_frame): """Check whether simplify_index retains the event resolution.""" wind_device_1 = Sensor.query.filter_by(name="wind-asset-1").one_or_none() bdf: tb.BeliefsDataFrame = TimedBelief.search( wind_device_1.name, event_starts_after=datetime(2015, 1, 1, tzinfo=pytz.utc), event_ends_before=datetime(2015, 1, 2, tzinfo=pytz.utc), resolution=timedelta(minutes=15), ).convert_index_from_belief_time_to_horizon() if check_empty_frame: # We empty the BeliefsDataFrame, which retains the metadata such as sensor and resolution bdf = bdf.iloc[0:0, :] df = simplify_index(bdf) assert df.event_resolution == timedelta(minutes=15)
def test_collect_power(db, app, query_start, query_end, num_values, setup_test_data): wind_device_1 = Sensor.query.filter_by(name="wind-asset-1").one_or_none() data = TimedBelief.query.filter( TimedBelief.sensor_id == wind_device_1.id).all() print(data) bdf: tb.BeliefsDataFrame = TimedBelief.search( wind_device_1.name, event_starts_after=query_start, event_ends_before=query_end, ) print(bdf) assert ( bdf.index.names[0] == "event_start" ) # first index level of collect function should be event_start, so that df.loc[] refers to event_start assert pd.api.types.is_timedelta64_dtype( bdf.convert_index_from_belief_time_to_horizon().index.get_level_values( "belief_horizon") ) # dtype of belief_horizon is timedelta64[ns], so the minimum horizon on an empty BeliefsDataFrame is NaT instead of NaN assert len(bdf) == num_values for v1, v2 in zip(bdf["event_value"].tolist(), data): assert abs(v1 - v2.event_value) < 10**-6
def get_weather_data( assets: List[Asset], metrics: dict, sensor_type: WeatherSensorType, query_window: Tuple[datetime, datetime], resolution: str, forecast_horizon: timedelta, ) -> Tuple[pd.DataFrame, pd.DataFrame, str, Sensor, dict]: """Get most recent weather data and forecast weather data for the requested forecast horizon. Return weather observations, weather forecasts (either might be an empty DataFrame), the name of the sensor type, the weather sensor and a dict with the following metrics: - expected value - mean absolute error - mean absolute percentage error - weighted absolute percentage error""" # Todo: for now we only collect weather data for a single asset asset = assets[0] weather_data = tb.BeliefsDataFrame(columns=["event_value"]) weather_forecast_data = tb.BeliefsDataFrame(columns=["event_value"]) sensor_type_name = "" closest_sensor = None if sensor_type: # Find the 50 closest weather sensors sensor_type_name = sensor_type.name closest_sensors = Sensor.find_closest( generic_asset_type_name=asset.generic_asset.generic_asset_type. name, sensor_name=sensor_type_name, n=50, object=asset, ) if closest_sensors: closest_sensor = closest_sensors[0] # Collect the weather data for the requested time window sensor_names = [sensor.name for sensor in closest_sensors] # Get weather data weather_bdf_dict: Dict[str, tb.BeliefsDataFrame] = TimedBelief.search( sensor_names, event_starts_after=query_window[0], event_ends_before=query_window[1], resolution=resolution, horizons_at_least=None, horizons_at_most=timedelta(hours=0), sum_multiple=False, ) weather_df_dict: Dict[str, pd.DataFrame] = {} for sensor_name in weather_bdf_dict: weather_df_dict[sensor_name] = simplify_index( weather_bdf_dict[sensor_name], index_levels_to_columns=["belief_horizon", "source"], ) # Get weather forecasts weather_forecast_bdf_dict: Dict[ str, tb.BeliefsDataFrame] = TimedBelief.search( sensor_names, event_starts_after=query_window[0], event_ends_before=query_window[1], resolution=resolution, horizons_at_least=forecast_horizon, horizons_at_most=None, source_types=["user", "forecasting script", "script"], sum_multiple=False, ) weather_forecast_df_dict: Dict[str, pd.DataFrame] = {} for sensor_name in weather_forecast_bdf_dict: weather_forecast_df_dict[sensor_name] = simplify_index( weather_forecast_bdf_dict[sensor_name], index_levels_to_columns=["belief_horizon", "source"], ) # Take the closest weather sensor which contains some data for the selected time window for sensor, sensor_name in zip(closest_sensors, sensor_names): if (not weather_df_dict[sensor_name] ["event_value"].isnull().values.all() or not weather_forecast_df_dict[sensor_name] ["event_value"].isnull().values.all()): closest_sensor = sensor break weather_data = weather_df_dict[sensor_name] weather_forecast_data = weather_forecast_df_dict[sensor_name] # Calculate the weather metrics if not weather_data.empty: metrics["realised_weather"] = weather_data["event_value"].mean( ) else: metrics["realised_weather"] = np.NaN if (not weather_forecast_data.empty and weather_forecast_data.size == weather_data.size): metrics["expected_weather"] = weather_forecast_data[ "event_value"].mean() metrics["mae_weather"] = calculations.mean_absolute_error( weather_data["event_value"], weather_forecast_data["event_value"]) metrics[ "mape_weather"] = calculations.mean_absolute_percentage_error( weather_data["event_value"], weather_forecast_data["event_value"]) metrics[ "wape_weather"] = calculations.weighted_absolute_percentage_error( weather_data["event_value"], weather_forecast_data["event_value"]) else: metrics["expected_weather"] = np.NaN metrics["mae_weather"] = np.NaN metrics["mape_weather"] = np.NaN metrics["wape_weather"] = np.NaN return ( weather_data, weather_forecast_data, sensor_type_name, closest_sensor, metrics, )
def get_prices_data( metrics: dict, market_sensor: Sensor, query_window: Tuple[datetime, datetime], resolution: str, forecast_horizon: timedelta, ) -> Tuple[pd.DataFrame, pd.DataFrame, dict]: """Get price data and metrics. Return price observations, price forecasts (either might be an empty DataFrame) and a dict with the following metrics: - expected value - mean absolute error - mean absolute percentage error - weighted absolute percentage error """ market_name = "" if market_sensor is None else market_sensor.name # Get price data price_bdf: tb.BeliefsDataFrame = TimedBelief.search( [market_name], event_starts_after=query_window[0], event_ends_before=query_window[1], resolution=resolution, horizons_at_least=None, horizons_at_most=timedelta(hours=0), ) price_df: pd.DataFrame = simplify_index( price_bdf, index_levels_to_columns=["belief_horizon", "source"]) if not price_bdf.empty: metrics["realised_unit_price"] = price_df["event_value"].mean() else: metrics["realised_unit_price"] = np.NaN # Get price forecast price_forecast_bdf: tb.BeliefsDataFrame = TimedBelief.search( [market_name], event_starts_after=query_window[0], event_ends_before=query_window[1], resolution=resolution, horizons_at_least=forecast_horizon, horizons_at_most=None, source_types=["user", "forecasting script", "script"], ) price_forecast_df: pd.DataFrame = simplify_index( price_forecast_bdf, index_levels_to_columns=["belief_horizon", "source"]) # Calculate the price metrics if not price_forecast_df.empty and price_forecast_df.size == price_df.size: metrics["expected_unit_price"] = price_forecast_df["event_value"].mean( ) metrics["mae_unit_price"] = calculations.mean_absolute_error( price_df["event_value"], price_forecast_df["event_value"]) metrics[ "mape_unit_price"] = calculations.mean_absolute_percentage_error( price_df["event_value"], price_forecast_df["event_value"]) metrics[ "wape_unit_price"] = calculations.weighted_absolute_percentage_error( price_df["event_value"], price_forecast_df["event_value"]) else: metrics["expected_unit_price"] = np.NaN metrics["mae_unit_price"] = np.NaN metrics["mape_unit_price"] = np.NaN metrics["wape_unit_price"] = np.NaN return price_df, price_forecast_df, metrics
def plot_beliefs( sensors: List[Sensor], start: datetime, duration: timedelta, belief_time_before: Optional[datetime], source: Optional[DataSource], ): """ Show a simple plot of belief data directly in the terminal. """ sensors = list(sensors) min_resolution = min([s.event_resolution for s in sensors]) # query data beliefs_by_sensor = TimedBelief.search( sensors=sensors, event_starts_after=start, event_ends_before=start + duration, beliefs_before=belief_time_before, source=source, one_deterministic_belief_per_event=True, resolution=min_resolution, sum_multiple=False, ) # only keep non-empty for s in sensors: if beliefs_by_sensor[s.name].empty: click.echo(f"No data found for sensor '{s.name}' (Id: {s.id})") beliefs_by_sensor.pop(s.name) sensors.remove(s) if len(beliefs_by_sensor.keys()) == 0: click.echo("No data found!") raise click.Abort() first_df = beliefs_by_sensor[sensors[0].name] # Build title if len(sensors) == 1: title = f"Beliefs for Sensor '{sensors[0].name}' (Id {sensors[0].id}).\n" else: title = f"Beliefs for Sensor(s) [{','.join([s.name for s in sensors])}], (Id(s): [{','.join([str(s.id) for s in sensors])}]).\n" title += f"Data spans {naturaldelta(duration)} and starts at {start}." if belief_time_before: title += f"\nOnly beliefs made before: {belief_time_before}." if source: title += f"\nSource: {source.description}" title += f"\nThe time resolution (x-axis) is {naturaldelta(min_resolution)}." uniplot.plot( [ beliefs.event_value for beliefs in [beliefs_by_sensor[sn] for sn in [s.name for s in sensors]] ], title=title, color=True, lines=True, y_unit=first_df.sensor.unit if len(beliefs_by_sensor) == 1 or all(sensor.unit == first_df.sensor.unit for sensor in sensors) else "", legend_labels=[s.name for s in sensors], )
def load_sensor_data( self, sensor_types: List[SensorType] = None, start: datetime = None, end: datetime = None, resolution: str = None, belief_horizon_window=(None, None), belief_time_window=(None, None), source_types: Optional[List[str]] = None, exclude_source_types: Optional[List[str]] = None, ) -> Resource: """Load data for one or more assets and cache the results. If the time range parameters are None, they will be gotten from the session. The horizon window will default to the latest measurement (anything more in the future than the end of the time interval. To load data for a specific source, pass a source id. :returns: self (to allow piping) Usage ----- >>> resource = Resource() >>> resource.load_sensor_data([Power], start=datetime(2014, 3, 1), end=datetime(2014, 3, 1)) >>> resource.cached_power_data >>> resource.load_sensor_data([Power, Price], start=datetime(2014, 3, 1), end=datetime(2014, 3, 1)).cached_price_data """ # Invalidate old caches self.clear_cache() # Look up all relevant sensor types for the given resource if sensor_types is None: # todo: after splitting Assets and Sensors, construct here a list of sensor types sensor_types = [Power, Price, Weather] # todo: after combining the Power, Price and Weather tables into one TimedBeliefs table, # retrieve data from different sensor types in a single query, # and cache the results grouped by sensor type (cached_price_data, cached_power_data, etc.) for sensor_type in sensor_types: if sensor_type == Power: sensor_key_attribute = "name" elif sensor_type == Price: sensor_key_attribute = "market.name" else: raise NotImplementedError("Unsupported sensor type") # Determine which sensors we need to query names_of_resource_sensors = set( coding_utils.rgetattr(asset, sensor_key_attribute) for asset in self.assets) # Query the sensors resource_data: Dict[str, tb.BeliefsDataFrame] = TimedBelief.search( list(names_of_resource_sensors), event_starts_after=start, event_ends_before=end, horizons_at_least=belief_horizon_window[0], horizons_at_most=belief_horizon_window[1], beliefs_after=belief_time_window[0], beliefs_before=belief_time_window[1], source_types=source_types, exclude_source_types=exclude_source_types, resolution=resolution, sum_multiple=False, ) # Cache the data setattr( self, f"cached_{sensor_type.__name__.lower()}_data", resource_data) # e.g. cached_price_data for sensor type Price return self
def collect_connection_and_value_groups( unit: str, resolution: str, belief_horizon_window: Tuple[Union[None, timedelta], Union[None, timedelta]], belief_time_window: Tuple[Optional[datetime_type], Optional[datetime_type]], start: datetime_type, duration: timedelta, connection_groups: List[List[str]], user_source_ids: Union[int, List[int]] = None, # None is interpreted as all sources source_types: List[str] = None, ) -> Tuple[dict, int]: """ Code for GETting power values from the API. Only allows to get values from assets owned by current user. Returns value sign in accordance with USEF specs (with negative production and positive consumption). """ current_app.logger.info("GETTING") user_sensors = get_sensors() if not user_sensors: current_app.logger.info("User doesn't seem to have any assets") user_sensor_ids = [sensor.id for sensor in user_sensors] end = start + duration value_groups = [] new_connection_groups = ( [] ) # Each connection in the old connection groups will be interpreted as a separate group for connections in connection_groups: # Get the sensor names sensor_names: List[str] = [] for connection in connections: # Parse the entity address try: connection_details = parse_entity_address( connection, entity_type="connection", fm_scheme="fm0" ) except EntityAddressException as eae: return invalid_domain(str(eae)) sensor_id = connection_details["asset_id"] # Look for the Sensor object if sensor_id in user_sensor_ids: sensor = Sensor.query.filter(Sensor.id == sensor_id).one_or_none() else: current_app.logger.warning("Cannot identify connection %s" % connection) return unrecognized_connection_group() sensor_names.append(sensor.name) # Get the power values # TODO: fill NaN for non-existing values power_bdf_dict: Dict[str, tb.BeliefsDataFrame] = TimedBelief.search( sensor_names, event_starts_after=start, event_ends_before=end, resolution=resolution, horizons_at_least=belief_horizon_window[0], horizons_at_most=belief_horizon_window[1], beliefs_after=belief_time_window[0], beliefs_before=belief_time_window[1], user_source_ids=user_source_ids, source_types=source_types, most_recent_beliefs_only=True, one_deterministic_belief_per_event=True, sum_multiple=False, ) # Todo: parse time window of power_bdf_dict, which will be different for requests that are not of the form: # - start is a timestamp on the hour or a multiple of 15 minutes thereafter # - duration is a multiple of 15 minutes for k, bdf in power_bdf_dict.items(): value_groups.append( [x * -1 for x in bdf["event_value"].tolist()] ) # Reverse sign of values (from FlexMeasures specs to USEF specs) new_connection_groups.append(k) response = groups_to_dict( new_connection_groups, value_groups, generic_asset_type_name="connection" ) response["start"] = isodate.datetime_isoformat(start) response["duration"] = isodate.duration_isoformat(duration) response["unit"] = unit # TODO: convert to requested unit d, s = request_processed() return dict(**response, **d), s