Exemplo n.º 1
0
def test_collect_power_resampled(db, app, query_start, query_end, resolution,
                                 num_values, setup_test_data):
    wind_device_1 = Sensor.query.filter_by(name="wind-asset-1").one_or_none()
    bdf: tb.BeliefsDataFrame = TimedBelief.search(
        wind_device_1.name,
        event_starts_after=query_start,
        event_ends_before=query_end,
        resolution=resolution,
        most_recent_beliefs_only=True,
    )
    print(bdf)
    assert len(bdf) == num_values
Exemplo n.º 2
0
def test_persist_beliefs(setup_beliefs, setup_test_data):
    """Check whether persisting beliefs works.

    We load the already set up beliefs, and form new beliefs an hour later.
    """
    sensor = Sensor.query.filter_by(name="epex_da").one_or_none()
    source = DataSource.query.filter_by(name="ENTSO-E").one_or_none()
    bdf: tb.BeliefsDataFrame = TimedBelief.search(
        sensor, source=source, most_recent_beliefs_only=False)

    # Form new beliefs
    df = bdf.reset_index()
    df["belief_time"] = df["belief_time"] + timedelta(hours=1)
    df["event_value"] = df["event_value"] * 10
    bdf = df.set_index(
        ["event_start", "belief_time", "source", "cumulative_probability"])

    TimedBelief.add(bdf)
    bdf: tb.BeliefsDataFrame = TimedBelief.search(
        sensor, source=source, most_recent_beliefs_only=False)
    assert len(bdf) == setup_beliefs * 2
Exemplo n.º 3
0
def test_query_beliefs(setup_beliefs):
    """Check various ways of querying for beliefs."""
    sensor = Sensor.query.filter_by(name="epex_da").one_or_none()
    source = DataSource.query.filter_by(name="ENTSO-E").one_or_none()
    bdfs = [
        TimedBelief.search(sensor,
                           source=source,
                           most_recent_beliefs_only=False),
        TimedBelief.search(sensor.id,
                           source=source,
                           most_recent_beliefs_only=False),
        TimedBelief.search(sensor.name,
                           source=source,
                           most_recent_beliefs_only=False),
        sensor.search_beliefs(source=source, most_recent_beliefs_only=False),
        tb.BeliefsDataFrame(sensor.beliefs)[tb.BeliefsDataFrame(
            sensor.beliefs).index.get_level_values("source") == source],
    ]
    for bdf in bdfs:
        assert sensor.event_resolution == timedelta(hours=1)
        assert bdf.event_resolution == timedelta(hours=1)
        assert len(bdf) == setup_beliefs
Exemplo n.º 4
0
def test_simplify_index(setup_test_data, check_empty_frame):
    """Check whether simplify_index retains the event resolution."""
    wind_device_1 = Sensor.query.filter_by(name="wind-asset-1").one_or_none()
    bdf: tb.BeliefsDataFrame = TimedBelief.search(
        wind_device_1.name,
        event_starts_after=datetime(2015, 1, 1, tzinfo=pytz.utc),
        event_ends_before=datetime(2015, 1, 2, tzinfo=pytz.utc),
        resolution=timedelta(minutes=15),
    ).convert_index_from_belief_time_to_horizon()
    if check_empty_frame:
        # We empty the BeliefsDataFrame, which retains the metadata such as sensor and resolution
        bdf = bdf.iloc[0:0, :]
    df = simplify_index(bdf)
    assert df.event_resolution == timedelta(minutes=15)
Exemplo n.º 5
0
def test_collect_power(db, app, query_start, query_end, num_values,
                       setup_test_data):
    wind_device_1 = Sensor.query.filter_by(name="wind-asset-1").one_or_none()
    data = TimedBelief.query.filter(
        TimedBelief.sensor_id == wind_device_1.id).all()
    print(data)
    bdf: tb.BeliefsDataFrame = TimedBelief.search(
        wind_device_1.name,
        event_starts_after=query_start,
        event_ends_before=query_end,
    )
    print(bdf)
    assert (
        bdf.index.names[0] == "event_start"
    )  # first index level of collect function should be event_start, so that df.loc[] refers to event_start
    assert pd.api.types.is_timedelta64_dtype(
        bdf.convert_index_from_belief_time_to_horizon().index.get_level_values(
            "belief_horizon")
    )  # dtype of belief_horizon is timedelta64[ns], so the minimum horizon on an empty BeliefsDataFrame is NaT instead of NaN
    assert len(bdf) == num_values
    for v1, v2 in zip(bdf["event_value"].tolist(), data):
        assert abs(v1 - v2.event_value) < 10**-6
Exemplo n.º 6
0
def get_weather_data(
    assets: List[Asset],
    metrics: dict,
    sensor_type: WeatherSensorType,
    query_window: Tuple[datetime, datetime],
    resolution: str,
    forecast_horizon: timedelta,
) -> Tuple[pd.DataFrame, pd.DataFrame, str, Sensor, dict]:
    """Get most recent weather data and forecast weather data for the requested forecast horizon.

    Return weather observations, weather forecasts (either might be an empty DataFrame),
    the name of the sensor type, the weather sensor and a dict with the following metrics:
    - expected value
    - mean absolute error
    - mean absolute percentage error
    - weighted absolute percentage error"""

    # Todo: for now we only collect weather data for a single asset
    asset = assets[0]

    weather_data = tb.BeliefsDataFrame(columns=["event_value"])
    weather_forecast_data = tb.BeliefsDataFrame(columns=["event_value"])
    sensor_type_name = ""
    closest_sensor = None
    if sensor_type:
        # Find the 50 closest weather sensors
        sensor_type_name = sensor_type.name
        closest_sensors = Sensor.find_closest(
            generic_asset_type_name=asset.generic_asset.generic_asset_type.
            name,
            sensor_name=sensor_type_name,
            n=50,
            object=asset,
        )
        if closest_sensors:
            closest_sensor = closest_sensors[0]

            # Collect the weather data for the requested time window
            sensor_names = [sensor.name for sensor in closest_sensors]

            # Get weather data
            weather_bdf_dict: Dict[str,
                                   tb.BeliefsDataFrame] = TimedBelief.search(
                                       sensor_names,
                                       event_starts_after=query_window[0],
                                       event_ends_before=query_window[1],
                                       resolution=resolution,
                                       horizons_at_least=None,
                                       horizons_at_most=timedelta(hours=0),
                                       sum_multiple=False,
                                   )
            weather_df_dict: Dict[str, pd.DataFrame] = {}
            for sensor_name in weather_bdf_dict:
                weather_df_dict[sensor_name] = simplify_index(
                    weather_bdf_dict[sensor_name],
                    index_levels_to_columns=["belief_horizon", "source"],
                )

            # Get weather forecasts
            weather_forecast_bdf_dict: Dict[
                str, tb.BeliefsDataFrame] = TimedBelief.search(
                    sensor_names,
                    event_starts_after=query_window[0],
                    event_ends_before=query_window[1],
                    resolution=resolution,
                    horizons_at_least=forecast_horizon,
                    horizons_at_most=None,
                    source_types=["user", "forecasting script", "script"],
                    sum_multiple=False,
                )
            weather_forecast_df_dict: Dict[str, pd.DataFrame] = {}
            for sensor_name in weather_forecast_bdf_dict:
                weather_forecast_df_dict[sensor_name] = simplify_index(
                    weather_forecast_bdf_dict[sensor_name],
                    index_levels_to_columns=["belief_horizon", "source"],
                )

            # Take the closest weather sensor which contains some data for the selected time window
            for sensor, sensor_name in zip(closest_sensors, sensor_names):
                if (not weather_df_dict[sensor_name]
                    ["event_value"].isnull().values.all()
                        or not weather_forecast_df_dict[sensor_name]
                    ["event_value"].isnull().values.all()):
                    closest_sensor = sensor
                    break

            weather_data = weather_df_dict[sensor_name]
            weather_forecast_data = weather_forecast_df_dict[sensor_name]

            # Calculate the weather metrics
            if not weather_data.empty:
                metrics["realised_weather"] = weather_data["event_value"].mean(
                )
            else:
                metrics["realised_weather"] = np.NaN
            if (not weather_forecast_data.empty
                    and weather_forecast_data.size == weather_data.size):
                metrics["expected_weather"] = weather_forecast_data[
                    "event_value"].mean()
                metrics["mae_weather"] = calculations.mean_absolute_error(
                    weather_data["event_value"],
                    weather_forecast_data["event_value"])
                metrics[
                    "mape_weather"] = calculations.mean_absolute_percentage_error(
                        weather_data["event_value"],
                        weather_forecast_data["event_value"])
                metrics[
                    "wape_weather"] = calculations.weighted_absolute_percentage_error(
                        weather_data["event_value"],
                        weather_forecast_data["event_value"])
            else:
                metrics["expected_weather"] = np.NaN
                metrics["mae_weather"] = np.NaN
                metrics["mape_weather"] = np.NaN
                metrics["wape_weather"] = np.NaN
    return (
        weather_data,
        weather_forecast_data,
        sensor_type_name,
        closest_sensor,
        metrics,
    )
Exemplo n.º 7
0
def get_prices_data(
    metrics: dict,
    market_sensor: Sensor,
    query_window: Tuple[datetime, datetime],
    resolution: str,
    forecast_horizon: timedelta,
) -> Tuple[pd.DataFrame, pd.DataFrame, dict]:
    """Get price data and metrics.

    Return price observations, price forecasts (either might be an empty DataFrame)
    and a dict with the following metrics:
    - expected value
    - mean absolute error
    - mean absolute percentage error
    - weighted absolute percentage error
    """

    market_name = "" if market_sensor is None else market_sensor.name

    # Get price data
    price_bdf: tb.BeliefsDataFrame = TimedBelief.search(
        [market_name],
        event_starts_after=query_window[0],
        event_ends_before=query_window[1],
        resolution=resolution,
        horizons_at_least=None,
        horizons_at_most=timedelta(hours=0),
    )
    price_df: pd.DataFrame = simplify_index(
        price_bdf, index_levels_to_columns=["belief_horizon", "source"])

    if not price_bdf.empty:
        metrics["realised_unit_price"] = price_df["event_value"].mean()
    else:
        metrics["realised_unit_price"] = np.NaN

    # Get price forecast
    price_forecast_bdf: tb.BeliefsDataFrame = TimedBelief.search(
        [market_name],
        event_starts_after=query_window[0],
        event_ends_before=query_window[1],
        resolution=resolution,
        horizons_at_least=forecast_horizon,
        horizons_at_most=None,
        source_types=["user", "forecasting script", "script"],
    )
    price_forecast_df: pd.DataFrame = simplify_index(
        price_forecast_bdf,
        index_levels_to_columns=["belief_horizon", "source"])

    # Calculate the price metrics
    if not price_forecast_df.empty and price_forecast_df.size == price_df.size:
        metrics["expected_unit_price"] = price_forecast_df["event_value"].mean(
        )
        metrics["mae_unit_price"] = calculations.mean_absolute_error(
            price_df["event_value"], price_forecast_df["event_value"])
        metrics[
            "mape_unit_price"] = calculations.mean_absolute_percentage_error(
                price_df["event_value"], price_forecast_df["event_value"])
        metrics[
            "wape_unit_price"] = calculations.weighted_absolute_percentage_error(
                price_df["event_value"], price_forecast_df["event_value"])
    else:
        metrics["expected_unit_price"] = np.NaN
        metrics["mae_unit_price"] = np.NaN
        metrics["mape_unit_price"] = np.NaN
        metrics["wape_unit_price"] = np.NaN
    return price_df, price_forecast_df, metrics
Exemplo n.º 8
0
def plot_beliefs(
    sensors: List[Sensor],
    start: datetime,
    duration: timedelta,
    belief_time_before: Optional[datetime],
    source: Optional[DataSource],
):
    """
    Show a simple plot of belief data directly in the terminal.
    """
    sensors = list(sensors)
    min_resolution = min([s.event_resolution for s in sensors])

    # query data
    beliefs_by_sensor = TimedBelief.search(
        sensors=sensors,
        event_starts_after=start,
        event_ends_before=start + duration,
        beliefs_before=belief_time_before,
        source=source,
        one_deterministic_belief_per_event=True,
        resolution=min_resolution,
        sum_multiple=False,
    )
    # only keep non-empty
    for s in sensors:
        if beliefs_by_sensor[s.name].empty:
            click.echo(f"No data found for sensor '{s.name}' (Id: {s.id})")
            beliefs_by_sensor.pop(s.name)
            sensors.remove(s)
    if len(beliefs_by_sensor.keys()) == 0:
        click.echo("No data found!")
        raise click.Abort()
    first_df = beliefs_by_sensor[sensors[0].name]

    # Build title
    if len(sensors) == 1:
        title = f"Beliefs for Sensor '{sensors[0].name}' (Id {sensors[0].id}).\n"
    else:
        title = f"Beliefs for Sensor(s) [{','.join([s.name for s in sensors])}], (Id(s): [{','.join([str(s.id) for s in sensors])}]).\n"
    title += f"Data spans {naturaldelta(duration)} and starts at {start}."
    if belief_time_before:
        title += f"\nOnly beliefs made before: {belief_time_before}."
    if source:
        title += f"\nSource: {source.description}"
    title += f"\nThe time resolution (x-axis) is {naturaldelta(min_resolution)}."

    uniplot.plot(
        [
            beliefs.event_value
            for beliefs in [beliefs_by_sensor[sn] for sn in [s.name for s in sensors]]
        ],
        title=title,
        color=True,
        lines=True,
        y_unit=first_df.sensor.unit
        if len(beliefs_by_sensor) == 1
        or all(sensor.unit == first_df.sensor.unit for sensor in sensors)
        else "",
        legend_labels=[s.name for s in sensors],
    )
Exemplo n.º 9
0
    def load_sensor_data(
        self,
        sensor_types: List[SensorType] = None,
        start: datetime = None,
        end: datetime = None,
        resolution: str = None,
        belief_horizon_window=(None, None),
        belief_time_window=(None, None),
        source_types: Optional[List[str]] = None,
        exclude_source_types: Optional[List[str]] = None,
    ) -> Resource:
        """Load data for one or more assets and cache the results.
        If the time range parameters are None, they will be gotten from the session.
        The horizon window will default to the latest measurement (anything more in the future than the
        end of the time interval.
        To load data for a specific source, pass a source id.

        :returns: self (to allow piping)

        Usage
        -----
        >>> resource = Resource()
        >>> resource.load_sensor_data([Power], start=datetime(2014, 3, 1), end=datetime(2014, 3, 1))
        >>> resource.cached_power_data
        >>> resource.load_sensor_data([Power, Price], start=datetime(2014, 3, 1), end=datetime(2014, 3, 1)).cached_price_data
        """

        # Invalidate old caches
        self.clear_cache()

        # Look up all relevant sensor types for the given resource
        if sensor_types is None:
            # todo: after splitting Assets and Sensors, construct here a list of sensor types
            sensor_types = [Power, Price, Weather]

        # todo: after combining the Power, Price and Weather tables into one TimedBeliefs table,
        #       retrieve data from different sensor types in a single query,
        #       and cache the results grouped by sensor type (cached_price_data, cached_power_data, etc.)
        for sensor_type in sensor_types:
            if sensor_type == Power:
                sensor_key_attribute = "name"
            elif sensor_type == Price:
                sensor_key_attribute = "market.name"
            else:
                raise NotImplementedError("Unsupported sensor type")

            # Determine which sensors we need to query
            names_of_resource_sensors = set(
                coding_utils.rgetattr(asset, sensor_key_attribute)
                for asset in self.assets)

            # Query the sensors
            resource_data: Dict[str, tb.BeliefsDataFrame] = TimedBelief.search(
                list(names_of_resource_sensors),
                event_starts_after=start,
                event_ends_before=end,
                horizons_at_least=belief_horizon_window[0],
                horizons_at_most=belief_horizon_window[1],
                beliefs_after=belief_time_window[0],
                beliefs_before=belief_time_window[1],
                source_types=source_types,
                exclude_source_types=exclude_source_types,
                resolution=resolution,
                sum_multiple=False,
            )

            # Cache the data
            setattr(
                self, f"cached_{sensor_type.__name__.lower()}_data",
                resource_data)  # e.g. cached_price_data for sensor type Price
        return self
Exemplo n.º 10
0
def collect_connection_and_value_groups(
    unit: str,
    resolution: str,
    belief_horizon_window: Tuple[Union[None, timedelta], Union[None, timedelta]],
    belief_time_window: Tuple[Optional[datetime_type], Optional[datetime_type]],
    start: datetime_type,
    duration: timedelta,
    connection_groups: List[List[str]],
    user_source_ids: Union[int, List[int]] = None,  # None is interpreted as all sources
    source_types: List[str] = None,
) -> Tuple[dict, int]:
    """
    Code for GETting power values from the API.
    Only allows to get values from assets owned by current user.
    Returns value sign in accordance with USEF specs
    (with negative production and positive consumption).
    """
    current_app.logger.info("GETTING")
    user_sensors = get_sensors()
    if not user_sensors:
        current_app.logger.info("User doesn't seem to have any assets")
    user_sensor_ids = [sensor.id for sensor in user_sensors]

    end = start + duration
    value_groups = []
    new_connection_groups = (
        []
    )  # Each connection in the old connection groups will be interpreted as a separate group
    for connections in connection_groups:

        # Get the sensor names
        sensor_names: List[str] = []
        for connection in connections:

            # Parse the entity address
            try:
                connection_details = parse_entity_address(
                    connection, entity_type="connection", fm_scheme="fm0"
                )
            except EntityAddressException as eae:
                return invalid_domain(str(eae))
            sensor_id = connection_details["asset_id"]

            # Look for the Sensor object
            if sensor_id in user_sensor_ids:
                sensor = Sensor.query.filter(Sensor.id == sensor_id).one_or_none()
            else:
                current_app.logger.warning("Cannot identify connection %s" % connection)
                return unrecognized_connection_group()
            sensor_names.append(sensor.name)

        # Get the power values
        # TODO: fill NaN for non-existing values
        power_bdf_dict: Dict[str, tb.BeliefsDataFrame] = TimedBelief.search(
            sensor_names,
            event_starts_after=start,
            event_ends_before=end,
            resolution=resolution,
            horizons_at_least=belief_horizon_window[0],
            horizons_at_most=belief_horizon_window[1],
            beliefs_after=belief_time_window[0],
            beliefs_before=belief_time_window[1],
            user_source_ids=user_source_ids,
            source_types=source_types,
            most_recent_beliefs_only=True,
            one_deterministic_belief_per_event=True,
            sum_multiple=False,
        )
        # Todo: parse time window of power_bdf_dict, which will be different for requests that are not of the form:
        # - start is a timestamp on the hour or a multiple of 15 minutes thereafter
        # - duration is a multiple of 15 minutes
        for k, bdf in power_bdf_dict.items():
            value_groups.append(
                [x * -1 for x in bdf["event_value"].tolist()]
            )  # Reverse sign of values (from FlexMeasures specs to USEF specs)
            new_connection_groups.append(k)
    response = groups_to_dict(
        new_connection_groups, value_groups, generic_asset_type_name="connection"
    )
    response["start"] = isodate.datetime_isoformat(start)
    response["duration"] = isodate.duration_isoformat(duration)
    response["unit"] = unit  # TODO: convert to requested unit

    d, s = request_processed()
    return dict(**response, **d), s