Exemplo n.º 1
0
def validate_dc_power(observation, values):
    """
    Run a number of validation checks on a daily timeseries of DC power.

    Parameters
    ----------
    observation : solarforecastarbiter.datamodel.Observation
       Observation object that the data is associated with
    values : pandas.Series
       Series of observation values

    Returns
    -------
    timestamp_flag : pandas.Series
        Bitmask from :py:func:`.validator.check_timestamp_spacing`
    night_flag : pandas.Series
        Bitmask from :py:func:`.validator.check_day_night` or
        :py:func:`.validator.check_day_night_interval`
    limit_flag : pandas.Series
        Bitmask from :py:func:`.validator.check_dc_power_limits`
    """
    solar_position, dni_extra, timestamp_flag, night_flag = _solpos_dni_extra(
        observation, values)
    day_night = \
        ~quality_mapping.convert_mask_into_dataframe(night_flag)['NIGHTTIME']
    dc_limit_flag = validator.check_dc_power_limits(
        values,
        day_night,
        observation.site.modeling_parameters.dc_capacity,
        _return_mask=True)
    return timestamp_flag, night_flag, dc_limit_flag
def generate_observation_figure(observation, data, limit=pd.Timedelta('3d')):
    """
    Creates a bokeh figure from API responses for an observation

    Parameters
    ----------
    observation : datamodel.Observation
        The Observation that is being plotted

    data : pandas.DataFrame
        The observation data to be plotted with datetime index
        and ('value', 'quality_flag') columns

    limit : pandas.Timedelta or None
        The time limit from the last datapoint to plot. If None, all
        data is plotted.

    Returns
    -------
    None
        When the data is empty
    script, div : str
        When return_components = True, return the <script> and <div>
        components for the Bokeh plot.
    bokeh components from gridplot
        When return_components = False
    """
    logger.info('Starting observation forecast generation...')
    if len(data.index) == 0:
        return None
    data = plot_utils.align_index(data, observation.interval_length, limit)
    quality_flag = data.pop('quality_flag').dropna().astype(int)
    bool_flags = quality_mapping.convert_mask_into_dataframe(quality_flag)
    active_flags = quality_mapping.convert_flag_frame_to_strings(bool_flags)
    active_flags.name = 'active_flags'
    flags = bool_flags.mask(~bool_flags).reindex(data.index)  # add missing
    flags['MISSING'] = pd.Series(1.0, index=data.index)[pd.isna(data['value'])]
    # need to fill as line needs more than a single point to show up
    if observation.interval_label == 'ending':
        flags.bfill(axis=0, limit=1, inplace=True)
    else:
        # for interval beginning and instantaneous
        flags.ffill(axis=0, limit=1, inplace=True)
    cds = ColumnDataSource(pd.concat([data, flags, active_flags], axis=1))
    figs = [
        make_basic_timeseries(cds, observation.name, observation.variable,
                              observation.interval_label, PLOT_WIDTH)
    ]

    figs.extend(make_quality_bars(cds, PLOT_WIDTH, figs[0].x_range))
    layout = _make_layout(figs)
    logger.info('Figure generated succesfully')
    return layout
def test_convert_mask_into_dataframe():
    flags = (pd.Series([0, 0, 1, 1 << 12, 1 << 9 | 1 << 7 | 1 << 5]) |
             quality_mapping.LATEST_VERSION_FLAG)
    expected = pd.DataFrame([[0] * 12,
                             [0] * 12,
                             [1] + [0] * 11,
                             [0] * 9 + [1, 0, 0],
                             [0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0]],
                            columns=DESCRIPTIONS + ['NOT VALIDATED'],
                            dtype=bool)
    out = quality_mapping.convert_mask_into_dataframe(flags)
    assert_frame_equal(out, expected)
Exemplo n.º 4
0
def test_convert_mask_into_dataframe_w_unvalidated():
    flags = (pd.Series([0, 0, 1, 1 << 12, 1 << 9 | 1 << 7 | 1 << 5])
             | quality_mapping.LATEST_VERSION_FLAG)
    flags.iloc[0] = 0
    columns = DESCRIPTIONS + ['NOT VALIDATED'] + DERIVED_DESCRIPTIONS
    expected = pd.DataFrame(
        [[0] * 12 + [1, 0, 0, 0], [0] * 13 + [1, 0, 0],
         [1] + [0] * 12 + [1, 0, 0], [0] * 9 + [1, 0, 0, 0, 1, 0, 0],
         [0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0]],
        columns=columns,
        dtype=bool)
    out = quality_mapping.convert_mask_into_dataframe(flags)
    assert_frame_equal(out, expected, check_like=True)
def _resample_event_obs(obs, fx, obs_data, quality_flags):
    """Resample the event observation.

    Parameters
    ----------
    obs : datamodel.Observation
        The Observation being resampled.
    fx : datamodel.EventForecast
        The corresponding Forecast.
    obs_data : pd.Series
        Timeseries data of the event observation.
    quality_flags : tuple of solarforecastarbiter.datamodel.QualityFlagFilter
        Flags to process and apply as filters during resampling.

    Returns
    -------
    obs_resampled : pandas.Series
        Timeseries data of the Observation resampled to match the Forecast.
    counts : dict
        Dict where keys are quality_flag.quality_flags and values
        are integers indicating the number of points filtered
        for the given flag.

    Raises
    ------
    ValueError
        If the Forecast and Observation do not have the same interval length.
    """
    if fx.interval_length != obs.interval_length:
        raise ValueError("Event observation and forecast time-series "
                         "must have matching interval length.")

    # bools w/ has columns like NIGHTTIME, CLEARSKY EXCEEDED, but many of
    # these are not valid for event obs! Arguably only USER FLAGGED and
    # NIGHTTIME are valid for event obs.
    obs_flags = quality_mapping.convert_mask_into_dataframe(
        obs_data['quality_flag'])
    obs_flags['ISNAN'] = obs_data['value'].isna()

    # determine the points that should never contribute
    # combine unique elements of tuple of tuples
    discard_before_resample_flags = set(['ISNAN'])
    for f in filter(lambda x: x.discard_before_resample, quality_flags):
        discard_before_resample_flags |= set(f.quality_flags)
    discard_before_resample = obs_flags[discard_before_resample_flags]
    counts = discard_before_resample.astype(int).sum(axis=0).to_dict()
    to_discard_before_resample = discard_before_resample.any(axis=1)

    obs_resampled = obs_data[~to_discard_before_resample]

    return obs_resampled, counts
Exemplo n.º 6
0
def apply_validation(obs_df, qfilter, handle_func):
    """
    Apply validation steps based on provided filters to the data.

    Parameters
    ----------
    obs_df : pandas.DataFrame
        The observation data with 'value' and 'quality_flag' columns
    qfilter : solarforecastarbiter.datamodel.QualityFlagFilter
    handle_func : function
        Function that handles how `quality_flags` will be used.
        See solarforecastarbiter.metrics.preprocessing.exclude as an
        example.

    Returns
    -------
    validated_obs : pandas.Series
        The validated timeseries data as pandas.Series.
    counts : dict
        Dict where keys are qfilter.quality_flags and values
        are integers indicating the number of points filtered
        for the given flag.
    """
    # List of flags from filter
    if not isinstance(qfilter, datamodel.QualityFlagFilter):
        raise TypeError(f"{qfilter} not a QualityFlagFilter")
    filters = qfilter.quality_flags

    if obs_df.empty:
        return obs_df.value, {f: 0 for f in filters}
    else:
        validation_df = quality_mapping.convert_mask_into_dataframe(
            obs_df['quality_flag'])
        validation_df = validation_df[list(filters)]
        validated_obs = handle_func(obs_df.value, validation_df)
        counts = validation_df.astype(int).sum(axis=0).to_dict()
        return validated_obs, counts
Exemplo n.º 7
0
def test_convert_mask_into_dataframe_all_unvalidated():
    flags = pd.Series([0, 0, 1, 1, 0])
    columns = ['NOT VALIDATED']
    expected = pd.DataFrame([[1]] * 5, columns=columns, dtype=bool)
    out = quality_mapping.convert_mask_into_dataframe(flags)
    assert_frame_equal(out, expected, check_like=True)
def _resample_obs(obs, fx, obs_data, quality_flags):
    """Resample observations.

    Parameters
    ----------
    obs : datamodel.Observation
        The Observation being resampled.
    fx : datamodel.Forecast
        The corresponding Forecast.
    obs_data : pandas.DataFrame
        Timeseries of values and quality flags of the
        observation/aggregate data.
    quality_flags : tuple of solarforecastarbiter.datamodel.QualityFlagFilter
        Flags to process and apply as filters during resampling.

    Returns
    -------
    obs_resampled : pandas.Series
        The observation time series resampled to match the forecast
        interval_length. Time series will have missing labels where
        values failed validation.
    counts : dict
        Dict where keys are quality_flag.quality_flags and values
        are integers indicating the number of points filtered
        for the given flag.

    Raises
    ------
    ValueError
        If fx.interval_length < obs.interval_length
    """
    if fx.interval_length < obs.interval_length:
        # typically impossible to reach this because ForecastObservation init
        # prevents it
        raise ValueError(
            'Cannot resample observation to match forecast because '
            'fx.interval_length < obs.interval_length.')

    if obs_data.empty:
        return obs_data['value'], []

    # label convention when resampling
    closed = datamodel.CLOSED_MAPPING[obs.interval_label]

    # bools w/ has columns like NIGHTTIME, CLEARSKY EXCEEDED
    obs_flags = quality_mapping.convert_mask_into_dataframe(
        obs_data['quality_flag'])
    obs_flags['ISNAN'] = obs_data['value'].isna()

    # determine the points that should be discarded before resampling.
    to_discard_before_resample, val_results = _calc_discard_before_resample(
        obs_flags, quality_flags, fx.interval_length, closed)

    # resample using all of the data except for what was flagged by the
    # discard before resample process.
    resampled_values = \
        obs_data.loc[~to_discard_before_resample, 'value'].resample(
            fx.interval_length, closed=closed, label=closed).mean()

    # determine the intervals that have too many flagged points
    to_discard_after_resample, after_resample_val_results = \
        _calc_discard_after_resample(
            obs_flags,
            quality_flags,
            to_discard_before_resample,
            fx.interval_length,
            obs.interval_length,
            closed
        )

    # discard the intervals with too many flagged sub-interval points.
    # resampled_values.index does not contain labels for intervals for
    # which all points were discarded, so care is needed in the next
    # indexing operation.
    good_labels = to_discard_after_resample.index[~to_discard_after_resample]
    obs_resampled = resampled_values.loc[resampled_values.index.intersection(
        good_labels)]

    # merge the val_results lists
    val_results += after_resample_val_results

    return obs_resampled, val_results