def test_get_data_start_end_labels_obs_longer_than_1h(site_metadata):
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('2h'))
    forecast = default_forecast(site_metadata, run_length=pd.Timedelta('5min'))
    run_time = pd.Timestamp('20190422T1945Z')
    # obs interval cannot be longer than 1 hr
    with pytest.raises(ValueError) as excinfo:
        utils.get_data_start_end(observation, forecast, run_time)
    assert 'observation.interval_length <= 1h' in str(excinfo.value)
def test_get_data_start_end_labels_obs_avg_fx_instant(site_metadata):
    run_time = pd.Timestamp('20190422T1945Z')
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='ending')
    forecast = default_forecast(site_metadata,
                                issue_time_of_day=dt.time(hour=5),
                                lead_time_to_start=pd.Timedelta('1h'),
                                interval_length=pd.Timedelta('5min'),
                                run_length=pd.Timedelta('1d'),
                                interval_label='instant')
    with pytest.raises(ValueError) as excinfo:
        utils.get_data_start_end(observation, forecast, run_time)
    assert 'made from interval average obs' in str(excinfo.value)
def test_get_data_start_end_labels_obs_fx_instant_mismatch(site_metadata):
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='instant')
    forecast = default_forecast(
        site_metadata,
        issue_time_of_day=dt.time(hour=5),
        lead_time_to_start=pd.Timedelta('1h'),
        interval_length=pd.Timedelta('1h'),  # interval_length must be equal
        run_length=pd.Timedelta('1d'),
        interval_label='instant')  # if interval_label also instant
    run_time = pd.Timestamp('20190422T1945Z')
    with pytest.raises(ValueError) as excinfo:
        utils.get_data_start_end(observation, forecast, run_time)
    assert 'with identical interval length' in str(excinfo.value)
def test_get_data_start_end_labels_subhourly_window_limit(site_metadata):
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='beginning')
    forecast = default_forecast(
        site_metadata,
        run_length=pd.Timedelta('5min'),  # test subhourly limit on window
        interval_label='beginning')
    run_time = pd.Timestamp('20190422T1945Z')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time)
    assert data_start == pd.Timestamp('20190422T1940Z')
    assert data_end == pd.Timestamp('20190422T1945Z')
示例#5
0
def _issue_time_generator(observation, fx, obs_mint, obs_maxt, next_issue_time,
                          max_run_time):
    # now find all the run times that can be made based on the
    # last observation timestamp
    while next_issue_time <= max_run_time:
        data_start, data_end = utils.get_data_start_end(
            observation, fx, next_issue_time, next_issue_time)
        if data_end > obs_maxt:
            break

        if data_start > obs_mint:
            yield next_issue_time
        next_issue_time = utils.get_next_issue_time(
            fx, next_issue_time + pd.Timedelta('1ns'))
def test_get_data_start_end_labels_1h_window_limit(site_metadata):
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='beginning')
    forecast = default_forecast(
        site_metadata,
        run_length=pd.Timedelta('12h'),  # test 1 hr limit on window
        interval_label='beginning')
    # ensure data no later than run time
    run_time = pd.Timestamp('20190422T1945Z')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time)
    assert data_start == pd.Timestamp('20190422T1845Z')
    assert data_end == pd.Timestamp('20190422T1945Z')
def test_get_data_start_end_labels_obs_instant_fx_avg_intraday(site_metadata):
    run_time = pd.Timestamp('20190422T1945Z')
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='instant')
    forecast = default_forecast(site_metadata,
                                issue_time_of_day=dt.time(hour=5),
                                lead_time_to_start=pd.Timedelta('1h'),
                                interval_length=pd.Timedelta('5min'),
                                run_length=pd.Timedelta('15min'),
                                interval_label='ending')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time)
    assert data_start == pd.Timestamp('20190422T193001Z')
    assert data_end == pd.Timestamp('20190422T1945Z')
def test_get_data_start_end_labels_obs_fx_instant(site_metadata):
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='instant')
    forecast = default_forecast(
        site_metadata,
        issue_time_of_day=dt.time(hour=5),
        lead_time_to_start=pd.Timedelta('1h'),
        interval_length=pd.Timedelta('5min'),  # interval_length must be equal
        run_length=pd.Timedelta('1d'),
        interval_label='instant')  # if interval_label also instant
    run_time = pd.Timestamp('20190422T1945Z')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time)
    assert data_start == pd.Timestamp('20190421T0000Z')
    assert data_end == pd.Timestamp('20190421T235959Z')
def test_get_data_start_end_labels_obs_instant_fx_avg(site_metadata):
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='instant')
    forecast = default_forecast(site_metadata,
                                issue_time_of_day=dt.time(hour=23),
                                lead_time_to_start=pd.Timedelta('1h'),
                                interval_length=pd.Timedelta('5min'),
                                run_length=pd.Timedelta('1d'),
                                interval_label='beginning')
    run_time = pd.Timestamp('20190422T1945Z')
    issue_time = pd.Timestamp('20190422T2300Z')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time, issue_time)
    assert data_start == pd.Timestamp('20190421T0000Z')
    assert data_end == pd.Timestamp('20190421T235959Z')
def test_get_data_start_end_labels_obs_longer_than_1h_day_ahead(site_metadata):
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('2h'),
                                      interval_label='beginning')
    forecast = default_forecast(
        site_metadata,
        issue_time_of_day=dt.time(hour=5),
        lead_time_to_start=pd.Timedelta('1h'),
        interval_length=pd.Timedelta('1h'),
        run_length=pd.Timedelta('1d'),  # day ahead
        interval_label='beginning')
    run_time = pd.Timestamp('20190422T1945Z')
    # day ahead doesn't care about obs interval length
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time)
    assert data_start == pd.Timestamp('20190421T0000Z')
    assert data_end == pd.Timestamp('20190422T0000Z')
def test_get_data_start_end_time_dayahead(site_metadata, rl, rt, lt,
                                          expected_start, expected_end):
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='beginning')

    run_time = pd.Timestamp(rt)
    issue_time = pd.Timestamp('20190410T2300Z')
    forecast = default_forecast(site_metadata,
                                issue_time_of_day=dt.time(hour=23),
                                lead_time_to_start=pd.Timedelta(lt),
                                interval_length=pd.Timedelta('1h'),
                                run_length=pd.Timedelta(rl),
                                interval_label='beginning')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time, issue_time)
    assert data_start == pd.Timestamp(expected_start)
    assert data_end == pd.Timestamp(expected_end)
def test_get_data_start_end_time_tz(site_metadata, variable, rl, issue, run,
                                    expected_start, expected_end):
    observation = default_observation(site_metadata,
                                      variable=variable,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='ending')
    forecast = default_forecast(site_metadata,
                                variable=variable,
                                issue_time_of_day=dt.time(hour=23),
                                lead_time_to_start=pd.Timedelta('1h'),
                                interval_length=pd.Timedelta('1h'),
                                run_length=pd.Timedelta(rl),
                                interval_label='beginning')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    pd.Timestamp(run),
                                                    pd.Timestamp(issue))
    assert data_start == pd.Timestamp(expected_start)
    assert data_end == pd.Timestamp(expected_end)
def test_get_data_start_end_labels_obs_fx_instant(site_metadata, lead, issue,
                                                  it):
    observation = default_observation(site_metadata,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='instant')
    # interval length of forecast and obs must be equal if interval label is
    # instant
    forecast = default_forecast(site_metadata,
                                issue_time_of_day=dt.time(hour=it),
                                lead_time_to_start=pd.Timedelta(lead),
                                interval_length=pd.Timedelta('5min'),
                                run_length=pd.Timedelta('1d'),
                                interval_label='instant')
    issue_time = pd.Timestamp(issue)
    run_time = issue_time - pd.Timedelta('75min')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time, issue_time)
    assert data_start == pd.Timestamp('20190421T0000Z')
    assert data_end == pd.Timestamp('20190421T235959Z')
def test_get_forecast_start_end_time_weekahead(site_metadata, variable,
                                               expected_start, expected_end):
    observation = default_observation(site_metadata,
                                      variable=variable,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='beginning')

    run_time = pd.Timestamp('20190410T0630Z')
    forecast = default_forecast(site_metadata,
                                variable=variable,
                                issue_time_of_day=dt.time(hour=10),
                                lead_time_to_start=pd.Timedelta('1h'),
                                interval_length=pd.Timedelta('1h'),
                                run_length=pd.Timedelta('1d'),
                                interval_label='beginning')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time)
    assert data_start == pd.Timestamp(expected_start)
    assert data_end == pd.Timestamp(expected_end)
def test_get_data_start_end_time_weekahead_not_midnight(site_metadata):
    variable = 'net_load'
    observation = default_observation(site_metadata,
                                      variable=variable,
                                      interval_length=pd.Timedelta('5min'),
                                      interval_label='beginning')

    run_time = pd.Timestamp('20190410T1030Z')
    issue_time = pd.Timestamp('20190410T1200Z')
    # fx from 2019-04-11 12:00
    forecast = default_forecast(site_metadata,
                                variable=variable,
                                issue_time_of_day=dt.time(hour=12),
                                lead_time_to_start=pd.Timedelta('1d'),
                                interval_length=pd.Timedelta('1h'),
                                run_length=pd.Timedelta('1d'),
                                interval_label='beginning')
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time, issue_time)
    assert data_start == pd.Timestamp('20190404T1200Z')
    assert data_end == pd.Timestamp('20190405T1200Z')
示例#16
0
def run_persistence(session,
                    observation,
                    forecast,
                    run_time,
                    issue_time,
                    index=False):
    """
    Run a persistence *forecast* for an *observation*.

    For intraday forecasts, the *index* argument controls if the
    forecast is constructed using persistence of the measured values
    (*index = False*) or persistence using clear sky index or AC power
    index.

    For day ahead forecasts, only persistence of measured values
    (*index = False*) is supported.

    Forecasts may be run operationally or retrospectively. For
    operational forecasts, *run_time* is typically set to now. For
    retrospective forecasts, *run_time* is the time by which the
    forecast should be run so that it could have been be delivered for
    the *issue_time*. Forecasts will only use data with timestamps
    before *run_time*.

    The persistence *window* is the time over which the persistence
    quantity (irradiance, power, clear sky index, or power index) is
    averaged. The persistence window is automatically determined
    from the *forecast* attributes:

      * Intraday persistence forecasts:
           *window = forecast.run_length*.
           No longer than 1 hour.
      * Day ahead forecasts (all but net load) and week ahead forecasts (net
        load only):
          *window = forecast.interval_length*.

    Users that would like more flexibility may use the lower-level
    functions in
    :py:mod:`solarforecastarbiter.reference_forecasts.persistence`.

    Parameters
    ----------
    session : api.Session
        The session object to use to request data from the
        SolarForecastArbiter API.
    observation : datamodel.Observation
        The metadata of the observation to be used to create the
        forecast.
    forecast : datamodel.Forecast
        The metadata of the desired forecast.
    run_time : pd.Timestamp
        Run time of the forecast.
    issue_time : pd.Timestamp
        Issue time of the forecast run.
    index : bool, default False
        If False, use persistence of observed value. If True, use
        persistence of clear sky or AC power index.

    Returns
    -------
    forecast : pd.Series
        Forecast conforms to the metadata specified by the *forecast*
        argument.

    Raises
    ------
    ValueError
        If forecast and issue_time are incompatible.
    ValueError
        If persistence window < observation.interval_length.
    ValueError
        If forecast.run_length = 1 day and forecast period is not
        midnight to midnight.
    ValueError
        If forecast.run_length = 1 day and index=True.
    ValueError
        If instantaneous forecast and instantaneous observation interval
        lengths do not match.
    ValueError
        If average observations are used to make instantaneous forecast.

    Notes
    -----
    For non-intraday net load forecasts, this function will use a weekahead
    persistence due to the fact that net load exhibits stronger correlation
    week-to-week than day-to-day. For example, the net load on a Monday tends
    to look more similar to the previous Monday that it does to the previous
    day (Sunday).
    """
    utils.check_persistence_compatibility(observation, forecast, index)
    forecast_start, forecast_end = utils.get_forecast_start_end(
        forecast, issue_time, False)
    intraday = utils._is_intraday(forecast)
    if not intraday:
        # raise ValueError if not intraday and not midnight to midnight
        utils._check_midnight_to_midnight(forecast_start, forecast_end)

    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time)

    def load_data(observation, data_start, data_end):
        df = session.get_observation_values(observation.observation_id,
                                            data_start, data_end,
                                            observation.interval_label)
        df = df.tz_convert(observation.site.timezone)
        return df['value']

    if intraday and index:
        fx = persistence.persistence_scalar_index(
            observation, data_start, data_end, forecast_start, forecast_end,
            forecast.interval_length, forecast.interval_label, load_data)
    elif intraday and not index:
        fx = persistence.persistence_scalar(observation, data_start, data_end,
                                            forecast_start, forecast_end,
                                            forecast.interval_length,
                                            forecast.interval_label, load_data)
    elif not intraday and not index:
        fx = persistence.persistence_interval(observation, data_start,
                                              data_end, forecast_start,
                                              forecast.interval_length,
                                              forecast.interval_label,
                                              load_data)
    else:  # pragma: no cover
        raise ValueError(
            'index=True not supported for forecasts with run_length >= 1day')

    return fx
示例#17
0
def run_persistence(session,
                    observation,
                    forecast,
                    run_time,
                    issue_time,
                    index=False,
                    load_data=None):
    """
    Run a persistence *forecast* for an *observation*.

    For intraday forecasts, the *index* argument controls if the
    forecast is constructed using persistence of the measured values
    (*index = False*) or persistence using clear sky index or AC power
    index.

    For day ahead forecasts, only persistence of measured values
    (*index = False*) is supported.

    Forecasts may be run operationally or retrospectively. For
    operational forecasts, *run_time* is typically set to now. For
    retrospective forecasts, *run_time* is the time by which the
    forecast should be run so that it could have been be delivered for
    the *issue_time*. Forecasts will only use data with timestamps
    before *run_time*.

    The persistence *window* is the time over which the persistence
    quantity (irradiance, power, clear sky index, or power index) is
    averaged. The persistence window is automatically determined
    from the *forecast* attributes:

    - Intraday persistence forecasts:

      + ``window = forecast.run_length``. No longer than 1 hour.

    - Day ahead forecasts (all but net load) and week ahead forecasts (net
      load only):

      + ``window = forecast.interval_length``.

    Users that would like more flexibility may use the lower-level
    functions in
    :py:mod:`solarforecastarbiter.reference_forecasts.persistence`.

    Parameters
    ----------
    session : api.Session
        The session object to use to request data from the
        SolarForecastArbiter API.
    observation : datamodel.Observation
        The metadata of the observation to be used to create the
        forecast.
    forecast : datamodel.Forecast
        The metadata of the desired forecast.
    run_time : pd.Timestamp
        Run time of the forecast.
    issue_time : pd.Timestamp
        Issue time of the forecast run.
    index : bool, default False
        If False, use persistence of observed value. If True, use
        persistence of clear sky or AC power index.
    load_data : function
        Function to load the observation data 'value' series given
        (observation, data_start, data_end) arguments. Typically,
        calls `session.get_observation_values` and selects the 'value'
        column. May also have data preloaded to then slice from
        data_start to data_end.

    Returns
    -------
    forecast : pd.Series
        Forecast conforms to the metadata specified by the *forecast*
        argument.

    Raises
    ------
    ValueError
        If forecast and issue_time are incompatible.
    ValueError
        If data is required from after run_time.
    ValueError
        If persistence window < observation.interval_length.
    ValueError
        If forecast.run_length => 1 day and index=True.
    ValueError
        If instantaneous forecast and instantaneous observation interval
        lengths do not match.
    ValueError
        If average observations are used to make instantaneous forecast.

    Notes
    -----
    For non-intraday net load forecasts, this function will use a weekahead
    persistence due to the fact that net load exhibits stronger correlation
    week-to-week than day-to-day. For example, the net load on a Monday tends
    to look more similar to the previous Monday that it does to the previous
    day (Sunday).
    """
    utils.check_persistence_compatibility(observation, forecast, index)
    forecast_start, forecast_end = utils.get_forecast_start_end(
        forecast, issue_time, False)
    intraday = utils._is_intraday(forecast)

    if load_data is None:
        load_data = _default_load_data(session)
    data_start, data_end = utils.get_data_start_end(observation, forecast,
                                                    run_time, issue_time)
    if data_end > run_time:
        raise ValueError(
            'Persistence forecast requires data from after run_time')

    if isinstance(forecast, datamodel.ProbabilisticForecast):
        cvs = [f.constant_value for f in forecast.constant_values]
        fx = persistence.persistence_probabilistic(
            observation, data_start, data_end, forecast_start, forecast_end,
            forecast.interval_length, forecast.interval_label, load_data,
            forecast.axis, cvs)
    elif intraday and index:
        fx = persistence.persistence_scalar_index(
            observation, data_start, data_end, forecast_start, forecast_end,
            forecast.interval_length, forecast.interval_label, load_data)
    elif intraday and not index:
        fx = persistence.persistence_scalar(observation, data_start, data_end,
                                            forecast_start, forecast_end,
                                            forecast.interval_length,
                                            forecast.interval_label, load_data)
    elif not intraday and not index:
        fx = persistence.persistence_interval(observation, data_start,
                                              data_end, forecast_start,
                                              forecast.interval_length,
                                              forecast.interval_label,
                                              load_data)
    else:  # pragma: no cover
        raise ValueError(
            'index=True not supported for forecasts with run_length >= 1day')

    return fx
示例#18
0
def generate_reference_persistence_forecast_gaps_parameters(
        session, forecasts, observations, start, end):
    """Sort through all *forecasts* to find those with gaps in the data
    that should be generated by the Arbiter from persisting
    Observation values. The forecast must have
    ``'is_reference_persistence_forecast': true`` and an
    observation_id in Forecast.extra_parameters (formatted as a JSON
    string). A boolean value for "index_persistence" in
    Forecast.extra_parameters controls whether the persistence
    forecast should be made adjusting for clear-sky/AC power index or
    not.

    Parameters
    ----------
    session : solarforecastarbiter.io.api.APISession
    forecasts : list of datamodel.Forecasts
        The forecasts that should be filtered to find references.
    observations : list of datamodel.Observations
        Observations that will are available to use to fetch values
        and make persistence forecasts.
    start : pandas.Timestamp
        The start of the period to search for missing forecast values.
    end : pandas.Timestamp
        The end of the period to search for missing forecast values.

    Returns
    -------
    generator of (Forecast, Observation, index, data_start, data_end, issue_times)

    """  # NOQA: E501
    user_info = session.get_user_info()
    observation_dict = {obs.observation_id: obs for obs in observations}
    out = namedtuple('PersistenceGapParameters', [
        'forecast', 'observation', 'index', 'data_start', 'data_end',
        'issue_times'
    ])
    for fx in forecasts:
        obs_ind_mint_maxt = _ref_persistence_check(fx, observation_dict,
                                                   user_info, session)
        if obs_ind_mint_maxt is None:
            continue
        observation, index, obs_mint, obs_maxt = obs_ind_mint_maxt

        times = set()
        gaps = session.get_value_gaps(fx, start, end)
        for gap in gaps:
            times |= set(
                _issue_time_generator(observation, fx, obs_mint, obs_maxt,
                                      gap[0], gap[1] - pd.Timedelta('1ns')))
        issue_times = tuple(sorted(times))
        if len(issue_times) == 0:
            continue

        # get_data_start_end only looks for start/end of a single
        # forecast run, so need to do for first and last issue times
        # to get full range of data possibly needed
        data_start, _ = utils.get_data_start_end(observation, fx,
                                                 issue_times[0],
                                                 issue_times[0])
        _, data_end = utils.get_data_start_end(observation, fx,
                                               issue_times[-1],
                                               issue_times[-1])
        yield out(fx, observation, index, data_start, data_end, issue_times)
示例#19
0
def generate_reference_persistence_forecast_parameters(session, forecasts,
                                                       observations,
                                                       max_run_time):
    """Sort through all *forecasts* to find those that should be generated
    by the Arbiter from persisting Observation values. The forecast
    must have ``'is_reference_persistence_forecast': true`` and an
    observation_id in Forecast.extra_parameters (formatted as a JSON
    string). A boolean value for "index_persistence" in
    Forecast.extra_parameters controls whether the persistence
    forecast should be made adjusting for clear-sky/AC power index or
    not.

    Parameters
    ----------
    session : solarforecastarbiter.io.api.APISession
    forecasts : list of datamodel.Forecasts
        The forecasts that should be filtered to find references.
    observations : list of datamodel.Observations
        Observations that will are available to use to fetch values
        and make persistence forecasts.
    max_run_time : pandas.Timestamp
        The maximum run time/issue time for any forecasts. Usually now.

    Returns
    -------
    generator of (Forecast, Observation, next_issue_time, index)

    """
    user_info = session.get_user_info()
    observation_dict = {obs.observation_id: obs for obs in observations}
    for fx in forecasts:
        if not _is_reference_persistence_forecast(fx.extra_parameters):
            logger.debug(
                'Forecast %s is not labeled as a reference '
                'persistence forecast', fx.forecast_id)
            continue

        if not fx.provider == user_info['organization']:
            logger.debug("Forecast %s is not in user's organization",
                         fx.forecast_id)
            continue

        try:
            extra_parameters = json.loads(fx.extra_parameters)
        except json.JSONDecodeError:
            logger.warning(
                'Failed to decode extra_parameters for %s: %s as JSON',
                fx.name, fx.forecast_id)
            continue

        try:
            observation_id = extra_parameters['observation_id']
        except KeyError:
            logger.error(
                'Forecast, %s: %s, has no observation_id to base forecasts'
                ' off of. Cannot make persistence forecast.', fx.name,
                fx.forecast_id)
            continue
        if observation_id not in observation_dict:
            logger.error(
                'Observation %s not in set of given observations.'
                ' Cannot generate persistence forecast for %s: %s.',
                observation_id, fx.name, fx.forecast_id)
            continue
        observation = observation_dict[observation_id]

        index = extra_parameters.get('index_persistence', False)
        obs_mint, obs_maxt = session.get_observation_time_range(observation_id)
        if pd.isna(obs_maxt):  # no observations to use anyway
            logger.info(
                'No observation values to use for %s: %s from observation %s',
                fx.name, fx.forecast_id, observation_id)
            continue

        fx_mint, fx_maxt = session.get_forecast_time_range(fx.forecast_id)
        # find the next issue time for the forecast based on the last value
        # in the forecast series
        if pd.isna(fx_maxt):
            # if there is no forecast yet, go back a bit from the last
            # observation. Don't use the start of observations, since it
            # could really stress the workers if we have a few years of
            # data before deciding to make a persistence fx
            next_issue_time = utils.get_next_issue_time(
                fx, obs_maxt - fx.run_length)
        else:
            next_issue_time = utils.find_next_issue_time_from_last_forecast(
                fx, fx_maxt)

        # now find all the run times that can be made based on the
        # last observation timestamp
        while next_issue_time <= max_run_time:
            data_start, data_end = utils.get_data_start_end(
                observation, fx, next_issue_time)
            if data_end > obs_maxt:
                break

            if data_start > obs_mint:
                yield (fx, observation, next_issue_time, index)
            next_issue_time = utils.get_next_issue_time(
                fx, next_issue_time + pd.Timedelta('1ns'))
示例#20
0
def generate_reference_persistence_forecast_parameters(session, forecasts,
                                                       observations,
                                                       max_run_time):
    """Sort through all *forecasts* to find those that should be generated
    by the Arbiter from persisting Observation values. The forecast
    must have ``'is_reference_persistence_forecast': true`` and an
    observation_id in Forecast.extra_parameters (formatted as a JSON
    string). A boolean value for "index_persistence" in
    Forecast.extra_parameters controls whether the persistence
    forecast should be made adjusting for clear-sky/AC power index or
    not.

    Parameters
    ----------
    session : solarforecastarbiter.io.api.APISession
    forecasts : list of datamodel.Forecasts
        The forecasts that should be filtered to find references.
    observations : list of datamodel.Observations
        Observations that will are available to use to fetch values
        and make persistence forecasts.
    max_run_time : pandas.Timestamp
        The maximum run time/issue time for any forecasts. Usually now.

    Returns
    -------
    generator of (Forecast, Observation, index, data_start, issue_times)
    """
    user_info = session.get_user_info()
    observation_dict = {obs.observation_id: obs for obs in observations}
    out = namedtuple(
        'PersistenceParameters',
        ['forecast', 'observation', 'index', 'data_start', 'issue_times'])

    for fx in forecasts:
        obs_ind_mint_maxt = _ref_persistence_check(fx, observation_dict,
                                                   user_info, session)
        if obs_ind_mint_maxt is None:
            continue
        observation, index, obs_mint, obs_maxt = obs_ind_mint_maxt
        # probably split this out to generate issues times for only gaps vs
        # latest
        if isinstance(fx, datamodel.ProbabilisticForecast):
            fx_mint, fx_maxt = \
                session.get_probabilistic_forecast_constant_value_time_range(
                    fx.constant_values[0].forecast_id)
        else:
            fx_mint, fx_maxt = session.get_forecast_time_range(fx.forecast_id)
        # find the next issue time for the forecast based on the last value
        # in the forecast series
        if pd.isna(fx_maxt):
            # if there is no forecast yet, go back a bit from the last
            # observation. Don't use the start of observations, since it
            # could really stress the workers if we have a few years of
            # data before deciding to make a persistence fx
            next_issue_time = utils.get_next_issue_time(
                fx, obs_maxt - fx.run_length)
        else:
            next_issue_time = utils.find_next_issue_time_from_last_forecast(
                fx, fx_maxt)

        data_start, _ = utils.get_data_start_end(observation, fx,
                                                 next_issue_time,
                                                 next_issue_time)
        issue_times = tuple(
            _issue_time_generator(observation, fx, obs_mint, obs_maxt,
                                  next_issue_time, max_run_time))

        if len(issue_times) == 0:
            continue

        yield out(fx, observation, index, data_start, issue_times)