Пример #1
0
def calculate_event_metrics(proc_fx_obs, categories, metrics):
    """
    Calculate event metrics for the processed data using the provided
    categories and metric types.

    Parameters
    ----------
    proc_fx_obs : datamodel.ProcessedForecastObservation
    categories : list of str
        List of categories to compute metrics over.
    metrics : list of str
        List of metrics to be computed.

    Returns
    -------
    solarforecastarbiter.datamodel.MetricResult
        Contains all the computed metrics by categories.

    Raises
    ------
    RuntimeError
        If there is no forecast, observation timeseries data or no metrics
        are specified.
    """

    out = {
        'name': proc_fx_obs.name,
        'forecast_id': proc_fx_obs.original.forecast.forecast_id,
        'observation_id': proc_fx_obs.original.observation.observation_id
    }

    fx = proc_fx_obs.forecast_values
    obs = proc_fx_obs.observation_values

    # No data or metrics
    if fx.empty:
        raise RuntimeError("No Forecast timeseries data.")
    elif obs.empty:
        raise RuntimeError("No Observation timeseries data.")
    elif len(metrics) == 0:
        raise RuntimeError("No metrics specified.")

    # Dataframe for grouping
    df = pd.concat({'forecast': fx, 'observation': obs}, axis=1)

    metric_vals = []
    # Calculate metrics
    for category in set(categories):
        # total (special category)
        if category == 'total':
            index_category = lambda x: 0  # NOQA
        else:
            index_category = getattr(df.index, category)

        # Calculate each metric
        for metric_ in set(metrics):
            # Group by category
            for cat, group in df.groupby(index_category):

                # Calculate
                res = _apply_event_metric_func(
                    metric_, group.forecast, group.observation
                )

                # Change category label of the group from numbers
                # to e.g. January or Monday
                if category == 'month':
                    cat = calendar.month_abbr[cat]
                elif category == 'weekday':
                    cat = calendar.day_abbr[cat]

                metric_vals.append(datamodel.MetricValue(
                    category, metric_, str(cat), res))

    out['values'] = _sort_metrics_vals(metric_vals,
                                       datamodel.ALLOWED_EVENT_METRICS)
    calc_metrics = datamodel.MetricResult.from_dict(out)
    return calc_metrics
Пример #2
0
def calculate_deterministic_metrics(processed_fx_obs, categories, metrics):
    """
    Calculate deterministic metrics for the processed data using the provided
    categories and metric types.

    Normalization is determined by the attributes of the input objects.

    If ``processed_fx_obs.uncertainty`` is not ``None``, a deadband
    equal to the uncertainty will be used by the metrics that support it.

    Parameters
    ----------
    processed_fx_obs : datamodel.ProcessedForecastObservation
    categories : list of str
        List of categories to compute metrics over.
    metrics : list of str
        List of metrics to be computed.

    Returns
    -------
    solarforecastarbiter.datamodel.MetricResult
        Contains all the computed metrics by categories.

    Raises
    ------
    RuntimeError
        If there is no forecast, observation timeseries data or no metrics
        are specified.
    """
    out = {
        'name': processed_fx_obs.name,
        'forecast_id': processed_fx_obs.original.forecast.forecast_id,
    }

    try:
        out['observation_id'] = processed_fx_obs.original.observation.observation_id  # NOQA: E501
    except AttributeError:
        out['aggregate_id'] = processed_fx_obs.original.aggregate.aggregate_id

    fx = processed_fx_obs.forecast_values
    obs = processed_fx_obs.observation_values

    # Check reference forecast is from processed pair, if needed
    ref_fx = processed_fx_obs.reference_forecast_values
    if ref_fx is None:
        ref_fx = np.nan  # avoids issues with None and deadband masking

    # No data or metrics
    if fx.empty:
        raise RuntimeError("No forecast timeseries data.")
    elif obs.empty:
        raise RuntimeError("No observation timeseries data.")
    elif len(metrics) == 0:
        raise RuntimeError("No metrics specified.")

    # Dataframe for grouping
    df = pd.DataFrame({'forecast': fx,
                       'observation': obs,
                       'reference': ref_fx})

    # get normalization factor
    normalization = processed_fx_obs.normalization_factor

    # get uncertainty.
    deadband = processed_fx_obs.uncertainty

    # Force `groupby` to be consistent with `interval_label`, i.e., if
    # `interval_label == ending`, then the last interval should be in the bin
    if processed_fx_obs.interval_label == "ending":
        df.index -= pd.Timedelta("1ns")

    metric_vals = []
    # Calculate metrics
    for category in set(categories):
        # total (special category)
        if category == 'total':
            index_category = lambda x: 0  # NOQA: E731
        else:
            index_category = getattr(df.index, category)

        # Calculate each metric
        for metric_ in metrics:

            # Group by category
            for cat, group in df.groupby(index_category):

                # Calculate
                res = _apply_deterministic_metric_func(
                    metric_, group.forecast, group.observation,
                    ref_fx=group.reference, normalization=normalization,
                    deadband=deadband)

                # Change category label of the group from numbers
                # to e.g. January or Monday
                if category == 'month':
                    cat = calendar.month_abbr[cat]
                elif category == 'weekday':
                    cat = calendar.day_abbr[cat]

                metric_vals.append(datamodel.MetricValue(
                    category, metric_, str(cat), res))

    out['values'] = _sort_metrics_vals(
        metric_vals, datamodel.ALLOWED_DETERMINISTIC_METRICS)
    calc_metrics = datamodel.MetricResult.from_dict(out)
    return calc_metrics
Пример #3
0
def _calculate_probabilistic_metrics_from_df(data_df, categories, metrics,
                                             interval_label):
    """
    Calculate probabilistic metrics for the processed data using the provided
    categories and metric types.

    Parameters
    ----------
    data_df : pandas.DataFrame
        DataFrame that contains all timeseries values on the same index.
    categories : list of str
        List of categories to compute metrics over.
    metrics : list of str
        List of metrics to be computed.
    interval_label : str

    Returns
    -------
    list of tuples of datamodel.MetricValue
        Contains all the computed metrics by categories. Each tuple is
        associated with a datamodel.ProbabilisticForecastConstantValue.
    """
    metric_values = []

    # Force `groupby` to be consistent with `interval_label`, i.e., if
    # `interval_label == ending`, then the last interval should be in the
    # bin
    if interval_label == "ending":
        data_df.index -= pd.Timedelta("1ns")

    # Calculate metrics
    for category in set(categories):
        # total (special category)
        if category == 'total':
            index_category = lambda x: 0  # NOQA: E731
        else:
            index_category = getattr(data_df.index, category)

        # Calculate each metric
        for metric_ in set(metrics):
            # Group by category
            for cat, group in data_df.groupby(index_category):

                try:
                    ref_fx_vals = group.xs('reference_forecast', level=1, axis=1).to_numpy()  # NOQA: E501
                    ref_fx_prob_vals = group.xs('reference_probability', level=1, axis=1).to_numpy()  # NOQA E501
                    if ref_fx_vals.size == ref_fx_vals.shape[0]:
                        ref_fx_vals = ref_fx_vals.T[0]
                        ref_fx_prob_vals = ref_fx_prob_vals.T[0]
                except KeyError:
                    ref_fx_vals = np.nan
                    ref_fx_prob_vals = np.nan

                fx_vals = group.xs('forecast', level=1, axis=1).to_numpy()
                fx_prob_vals = group.xs('probability', level=1, axis=1).to_numpy()  # NOQA: E501
                if fx_vals.size == fx_vals.shape[0]:
                    fx_vals = fx_vals.T[0]
                    fx_prob_vals = fx_prob_vals.T[0]
                obs_vals = group[(None, 'observation')].to_numpy()

                # Calculate
                res = _apply_probabilistic_metric_func(
                    metric_, fx_vals, fx_prob_vals, obs_vals,
                    ref_fx=ref_fx_vals, ref_fx_prob=ref_fx_prob_vals)

                # Change category label of the group from numbers
                # to e.g. January or Monday
                if category == 'month':
                    cat = calendar.month_abbr[cat]
                elif category == 'weekday':
                    cat = calendar.day_abbr[cat]

                metric_values.append(datamodel.MetricValue(
                    category, metric_, str(cat), res))

    return metric_values
def calculate_summary_statistics(processed_fx_obs, categories):
    """
    Calculate summary statistics for the processed data using the provided
    categories and all metrics defined in :py:mod:`.summary`.

    Parameters
    ----------
    proc_fx_obs : datamodel.ProcessedForecastObservation
    categories : list of str
        List of categories to compute metrics over.

    Returns
    -------
    solarforecastarbiter.datamodel.MetricResult
        Contains all the computed statistics by category.

    Raises
    ------
    RuntimeError
        If there is no data to summarize
    """
    out = {
        'name': processed_fx_obs.name,
        'forecast_id': processed_fx_obs.original.forecast.forecast_id,
        'is_summary': True
    }
    try:
        out['observation_id'] = \
            processed_fx_obs.original.observation.observation_id
    except AttributeError:
        out['aggregate_id'] = \
            processed_fx_obs.original.aggregate.aggregate_id

    dfd = {'observation': processed_fx_obs.observation_values}
    # only calculate stats for deterministic forecasts
    # but always for observations
    if _is_deterministic_forecast(processed_fx_obs):
        dfd['forecast'] = processed_fx_obs.forecast_values
        ref_fx = processed_fx_obs.reference_forecast_values
        if ref_fx is not None:
            dfd['reference_forecast'] = ref_fx

    df = pd.DataFrame(dfd)
    if df.empty:
        raise RuntimeError('No data to calculate summary statistics for.')

    # Force `groupby` to be consistent with `interval_label`, i.e., if
    # `interval_label == ending`, then the last interval should be in the bin
    if processed_fx_obs.interval_label == "ending":
        df.index -= pd.Timedelta("1ns")

    metric_vals = []
    # Calculate metrics
    for category in set(categories):
        index_category = _index_category(category, df)

        # Group by category
        for cat, group in df.groupby(index_category):
            all_metrics = _calculate_summary_for_frame(group)

            # Change category label of the group from numbers
            # to e.g. January or Monday
            if category == 'month':
                cat = calendar.month_abbr[cat]
            elif category == 'weekday':
                cat = calendar.day_abbr[cat]

            metric_vals.extend([
                datamodel.MetricValue(category, f'{obj}_{met}', str(cat), val)
                for obj, ser in all_metrics.items()
                for met, val in ser.items()
            ])
    out['values'] = _sort_metrics_vals(
        metric_vals, {
            f'{type_}_{k}': v
            for k, v in datamodel.ALLOWED_SUMMARY_STATISTICS.items()
            for type_ in ('forecast', 'observation', 'reference_forecast')
        })
    calc_stats = datamodel.MetricResult.from_dict(out)
    return calc_stats