def empirical_ema_r1(y: Y_TYPE,
                     s,
                     k: int,
                     a: A_TYPE = None,
                     t: T_TYPE = None,
                     e: E_TYPE = None,
                     r: R_TYPE = None):
    """ Exponential moving average, with empirical std

          r      weight to place on existing anchor point

    """
    assert r is not None
    y0 = wrap(y)[0]
    if not s.get('p'):
        s = {'p': {}, 'x': y0, 'rho': r}
        assert 0 <= s['rho'] <= 1, 'Expecting rho=r to be between 0 and 1'
    else:
        assert abs(r - s['rho']) < 1e-6, 'rho=r is immutable'

    if y0 is None:
        return None, s, None
    else:
        s['x'] = s['rho'] * s['x'] + (1 - s['rho']) * y0  # Make me better !
        x = [s['x']] * k
        _we_ignore_bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0)
        x_std_fallback = nonecast(x_std, fill_value=1.0)
        return [s['x']] * k, x_std_fallback, s
    def nprophet_fit_and_predict_simple(
            y: [float],
            k: int,
            freq: str = None,
            model_params: dict = None) -> Tuple[List, List, Any, Any]:
        """ Simpler wrapper for testing - univariate only """
        assert isinstance(y[0], float)
        freq = freq or NPROPHET_META['freq']
        used_params = NPROPHET_MODEL
        used_params.update({'n_forecasts': k})
        if model_params:
            used_params.update(model_params)

        if len(y) < used_params['n_lags']:
            x = [wrap(y)[0]] * k
            x_std = [1.0] * k
            return x, x_std, None, None
        else:
            model = NeuralProphet(**used_params)
            model.set_log_level(log_level='CRITICAL')
            df = pd.DataFrame(columns=['y'], data=y)
            df['ds'] = pd.date_range(start='2021-01-01',
                                     periods=len(y),
                                     freq=freq)
            metrics = model.fit(df, freq=freq, epochs=40, use_tqdm=False)
            future = model.make_future_dataframe(df)
            forecast = model.predict(future)
            x = [
                forecast['yhat' + str(j + 1)].values[-k + j] for j in range(k)
            ]
            x_std = [1.0] * k
            return x, x_std, forecast, model
 def fbprophet_cautious(y: Y_TYPE,
                        s: dict,
                        k: int,
                        a: A_TYPE = None,
                        t: T_TYPE = None,
                        e: E_TYPE = None):
     """ Similar to fbexogenous, but no crazy nonsense """
     if not s.get('s'):
         s['s'] = {}  # prophet's state
         s['y'] = list()  # maintain last five values
     y0 = wrap(y)[0]
     s['y'].append(y0)
     if len(s['y']) > 5:
         s['y'].pop(0)
     import math
     x_upper = [
         np.max(s['y']) + math.sqrt(j + 1) * np.std(s['y'])
         for j in range(k)
     ]
     x_lower = [
         np.min(s['y']) - math.sqrt(j + 1) * np.std(s['y'])
         for j in range(k)
     ]
     x, x_std, s['s'] = fbprophet_univariate(y=y,
                                             s=s['s'],
                                             k=k,
                                             a=a,
                                             t=t,
                                             e=e)
     x_careful = np.minimum(np.array(x), np.array(x_upper))
     x_careful = np.maximum(x_careful, np.array(x_lower))
     return list(x_careful), x_std, s
示例#4
0
def nproph_univariate(y:Y_TYPE, s:dict, k:int=1,
                      a:A_TYPE=None, t:T_TYPE=None, 
                      e:E_TYPE=None):
    """ Uses only y[0] and ignores y[1:] and a[:] """
    y0 = [wrap(y)[0]]
    return nproph_skater_factory(
        y=y0, s=s, k=k, a=None, t=t, e=e, method='auto'
    )


# def nproph_exogenous(y:Y_TYPE, s:dict, k:int=1, a:A_TYPE=None, t:T_TYPE=None, e:E_TYPE=None):
#     """ Predict using auto_arima, with both simultaneously observed and known in advance variables
#         This skater has no hyper-parameters
# 
#         y: Y_TYPE    scalar or list where y[1:] are interpreted as contemporaneously observed exogenous variables
#         s:           state
#         k:           Number of steps ahead to predict
#         a:           (optional) scalar or list of variables known k-steps in advance.
#                       (IMPORTANT: If supplying 'a', provide the known variable k steps ahead, not the contemporaneous one !).
#         t:           (optional) Time of observation.
#         e:           (optional) Maximum computation time (supply e>60 to give hint to do fitting)
# 
#         :returns: x [float] , s', scale [float]
#     """
#     return nproph_skater_factory(y=y, s=s, k=k, a=a, t=t, e=e, method='auto')
# 
# 
# def nproph_known(y:Y_TYPE, s:dict, k:int=1, a:A_TYPE=None, t:T_TYPE=None, e:E_TYPE=None):
#     """ Uses known-in-advance but not y[1:] """
#     y0 = [wrap(y)[0]]
#     return nproph_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e, method='auto')


# def nproph_exog_compare(f,k=1):
#     from timemachines.skatertools.evaluation.evaluators import evaluate_mean_absolute_error
#     from timemachines.skatertools.evaluation.evaluators import hospital_with_exog
#     y, a = hospital_with_exog(k=k)
#     y0 = [ yi[0] for yi in y ]
# 
#     r = 0.1 # Doesn't matter?
#     err1 = evaluate_mean_absolute_error(f=f, k=k, y=y0, r=r, n_burn=250)
#     err2 = evaluate_mean_absolute_error(f=f, k=k, y=y, r=r, n_burn=250)
#     err3 = evaluate_mean_absolute_error(f=f, k=k, y=y, r=r, a=a, n_burn=250)
#     errlv = evaluate_mean_absolute_error(f=empirical_last_value, k=k, y=y, r=r, a=a, n_burn=250)
# 
# 
#     print('----------------')
#     print("Error w/o exogenous   = "+str(err1))
#     print("Error w   exogenous   = "+str(err2))
#     print("Error w   exo + known = "+str(err3))
#     print("Error last val cache  = " + str(errlv))
# 
# 
# if __name__ == '__main__':
#     f = nproph_exogenous
#     if True:
#         prior_plot_exogenous(f=f, k=1, n=200)
#     if True:
#         prior_plot(f=f,k=1,n=200)
# 
示例#5
0
    def is_opinonated(y, forecast: pd.DataFrame, k: int, n_recent: int,
                      multiple: float) -> bool:
        """ Check if the forecast is far from any recent values, and thus "opinionated"

        :param y:          data used to fit
        :param forecast:   dataframe produced by prophet fitting
        :param m:          fitted facebook prophet model
        :param k:          number of steps ahead
        :return:
        """
        if isinstance(y[0], float):
            y = [wrap(yj) for yj in y]
        y0 = [yj[0] for yj in y]

        for j in range(1, k + 1):
            j_std = np.nanstd(np.diff(y0[-k - 50:-k], j))
            recent_ys = y0[-(k + n_recent):-(k + 1)]
            upper = np.max(recent_ys) + multiple * j_std * math.sqrt(j) + 0.1
            lower = np.min(recent_ys) - multiple * j_std * math.sqrt(j) - 0.1
            j_x = forecast['yhat'].values[-(1 + k - j)]
            if j_x > upper or j_x < lower:
                deviation = abs(j_x - upper)
                print(deviation)
                return True

        return False
示例#6
0
def tsa_factory(y: Y_TYPE,
                s: dict,
                k: int,
                a: A_TYPE = None,
                t: T_TYPE = None,
                e: E_TYPE = None,
                p: int = TSA_P_DEFAULT,
                d: int = TSA_D_DEFAULT,
                q: int = TSA_D_DEFAULT) -> ([float], Any, Any):
    """ Extremely simple univariate, fixed p,d,q ARIMA model that is re-fit each time """

    # TODO: FIX THIS TO USE EMPIRICAL STD, OTHERWISE ENSEMBLES ARE DREADFUL

    y = wrap(y)
    a = wrap(a)

    if not s.get('y'):
        s = {'y': list(), 'a': list(), 'k': k, 'p': {}}
    else:
        # Assert immutability of k, dimensions
        if s['y']:
            assert len(y) == len(s['y'][0])
            assert k == s['k']
        if s['a']:
            assert len(a) == len(s['a'][0])

    if y is None:
        return None, s, None
    else:
        s['y'].append(y)
        if a is not None:
            s['a'].append(a)
        if len(s['y']) > max(2 * k + 5, TSA_META['n_warm']):
            y0s = [y_[0] for y_ in s['y']]
            model = ARIMA(y0s, order=(p, d, q))
            try:
                x = list(model.fit().forecast(steps=k))
            except:
                x = [wrap(y)[0]] * k
        else:
            x = [y[0]] * k

        y0 = wrap(y)[0]
        _we_ignore_bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0)
        x_std_fallback = nonecast(x_std, fill_value=1.0)
        return x, x_std_fallback, s
示例#7
0
def regress_level_on_first_known(y:Y_TYPE, s:dict, k, a:A_TYPE=None, t:T_TYPE =None, e:E_TYPE =None)->([float] , Any , Any):
    """ Very basic online regression skater, mostly for testing
           - Only one known in advance variable is utilized
           - Last value is ignored, unless a is None in which case we return 0.0
           - Empirical std is returned
    """
    y0 = wrap(y)[0]  # Ignore contemporaneous, exogenous variables
    if a:
        a0 = wrap(a)[0]  # Ignore all but the first known-in-advance variable

    if not s.get('k'):
        # First invocation
        s = {'p': {}} # Prediction parade
        s['r'] = {}   # Regression state, not to be confused with hyper-param r
        s['k'] = k
        s['o'] = {}   # The "observance" will quarantine 'a' until it can be matched
    else:
        assert s['k']==k  # Immutability

    if a is None:
        return [0]*k, [1.0]*k, s
    else:
        a_t, s['o'] = observance( y=[y0],o=s['o'], k=k, a= [a0])  # Update the observance
        if a_t is not None: # This is the contemporaneous 'a', which was supplied k calls ago.
            if not s['r']:
                # When first calling the online regression algorithm we avoid the degenerate case
                # by sending it two observations.
                y_noise = 0.1*(1e-6+abs(y0))*np.random.randn()
                x_noise = 0.1*(1e-6+abs(a0))*np.random.randn()
                x = [ a_t[0]-x_noise, a_t[0]+x_noise  ]
                y = [ y0-y_noise,  y0+y_noise  ]
                s['r'] = regress_one_helper(x=x, y=y, r=s['r'])
            else:
                s['r'] = regress_one_helper(x=a_t, y=[y0], r=s['r'])

            # Predict using contemporaneous alpha's
            x = [ s['r']['alpha'] + s['r']['beta']*ak[0] for ak in s['o']['a'] ]

            # Push prediction into the parade and get the current bias/stderr
            bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0)
            return x, x_std, s    # TODO: Use the std implied by regression instead
        else:
            x = [y0]*k
            bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0)
            return x , x_std, s
示例#8
0
def pmd_known(y: Y_TYPE,
              s: dict,
              k: int = 1,
              a: A_TYPE = None,
              t: T_TYPE = None,
              e: E_TYPE = None):
    """ Uses known-in-advance but not y[1:] """
    y0 = [wrap(y)[0]]
    return pmd_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e, method='auto')
 def fbprophet_univariate(y: Y_TYPE,
                          s: dict,
                          k: int,
                          a: A_TYPE = None,
                          t: T_TYPE = None,
                          e: E_TYPE = None):
     """ Simple univariate prediction using only y[0], and not 'a' or y[1:] """
     y0 = [wrap(y)[0]]
     return fbprophet_skater_factory(y=y0, s=s, k=k, a=None, t=t, e=e)
 def fbprophet_known(y: Y_TYPE,
                     s: dict,
                     k: int,
                     a: A_TYPE = None,
                     t: T_TYPE = None,
                     e: E_TYPE = None):
     """ Uses known-in-advance but not y[1:] """
     y0 = [wrap(y)[0]]
     return fbprophet_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e)
示例#11
0
def pmd_univariate(y: Y_TYPE,
                   s: dict,
                   k: int = 1,
                   a: A_TYPE = None,
                   t: T_TYPE = None,
                   e: E_TYPE = None):
    """ Uses only y[0] and ignores y[1:] and a[:] """
    y0 = [wrap(y)[0]]
    return pmd_skater_factory(y=y0, s=s, k=k, a=None, t=t, e=e, method='auto')
示例#12
0
def trivial_last_value(y: Y_TYPE,
                       s: dict,
                       k: int = 1,
                       a: A_TYPE = None,
                       t: T_TYPE = None,
                       e: E_TYPE = None) -> ([float], [float], Any):
    """ Last value cache """

    if y is None:
        return None, None, s
    else:
        y0 = wrap(y)[0]  # Ignore the rest
        x = [y0] * k  # What a great prediction !
        return x, 1.0, {}
示例#13
0
def fbprophet_skater_testor(y :Y_TYPE, s:dict=None, k:int =1, a:A_TYPE =None,
                     t:T_TYPE=None, e:E_TYPE =None, r:R_TYPE =None, freq=None, n_max=None):
    """ A default facebook prophet usage, with no hyper-parameters and no prediction parade """
    # For testing

    if freq is None:
        freq = PROPHET_META['freq']
    if n_max is None:
        n_max = PROPHET_META['n_max']

    y = wrap(y)
    a = wrap(a)

    if not s.get('y'):
        s = {'y': list(),
             'a': list(),
             'k': k}
    else:
        # Assert immutability of k, dimensions
        if s['y']:
            assert len(y) == len(s['y'][0])
            assert k == s['k']
        if s['a']:
            assert len(a) == len(s['a'][0])

    if y is None:
        return None, s, None
    else:
        s['y'].append(y)
        if a is not None:
            s['a'].append(a)
        if len(s['y']) > max(2*k+5,PROPHET_META['n_warm']):
            x, x_std, _, _ = prophet_iskater_factory(y=s['y'], k=k, a=s['a'], freq=freq, n_max=n_max)
        else:
            x = [y[0]] * k
            x_std = [1.0] * k
        return x, x_std, s
 def nprophet_iskater_factory(y: [[float]],
                              k: int,
                              a: List = None,
                              t: List = None,
                              e=None,
                              freq: str = None,
                              n_max=1000,
                              recursive: bool = False,
                              model_params: dict = None,
                              return_forecast=True):
     # For now we keep it simple. Will add to this over time
     y0s = [wrap(yi)[0] for yi in y]
     x, x_std, forecast, m = nprophet_fit_and_predict_simple(
         y=y0s, k=k, freq=freq, model_params=model_params)
     return (x, x_std, forecast, m) if return_forecast else (x, x_std)
示例#15
0
def dlm_exogenous_r3(y, s, k, a, t, e, r):
    """ One way to use dlm
        :returns: x, s', w
    """
    if not s:
        s = dict()
        s['dim'] = dimension(y)
        s = dlm_set_exog_hyperparams(s=s, r=r)
        y0, exog = split_exogenous(y=y)
        s['n_obs'] = 0
        s['model'] = quietDlm([], printInfo=False) + trend(
            s['trend_degree'], s['discount']) + seasonality(
                s['period'], s['discount'])
        s['model'] = s['model'] + fixedAutoReg(
            degree=s['auto_degree'], name='ar', w=1.0)
        if exog:
            exog_wrapped = [[None if np.isnan(ex0) else ex0 for ex0 in exog]]
            s['model'] = s['model'] + dynamic(features=exog_wrapped,
                                              discount=0.99,
                                              name='exog')  # Set's first exog

    if y is not None:
        y = wrap(y)
        assert dimension(y) == s['dim'], 'Cannot change dimension of data sent'
        s['n_obs'] += 1
        y0, exog = split_exogenous(y=y)
        y0_passed_in = None if np.isnan(
            y0) else y0  # pydlm uses None for missing values
        s['model'].append([y0_passed_in])
        if exog:
            exog_wrapped = [[None if np.isnan(ex0) else ex0 for ex0 in exog]]
            if s['n_obs'] > 1:
                s['model'].append(
                    data=exog_wrapped,
                    component='exog')  # Don't get first exog twice
        num_obs = len(s['model'].data) if s.get('model') else 0
        if num_obs % s['n_fit'] == s['n_fit'] - 1:
            _, _, s = dlm_exogenous_r3(y=None, s=s, k=k, a=a, t=t, e=10, r=r)
        s['model'].fitForwardFilter()
        return _dlm_exog_prediction_helper(s=s, k=k, y=y)

    if y is None:
        if dimension(y) == 1:
            s['model'].tune(maxit=20)
            # Don't tune if exogenous ... haven't got this to work
        s['model'].fit()
        return None, None, s
示例#16
0
def regress_change_on_first_known(y:Y_TYPE, s:dict, k, a:A_TYPE=None, t:T_TYPE =None, e:E_TYPE =None )->([float] , Any , Any):
    """ Very basic modification of the last value cache.
        This looks at the contemporaneous influence of a single known in advance variable.
        Assumes independent increments when estimating the standard deviation.
        This is also intended to illustrate combination of skaters
    """
    y0 = wrap(y)[0]  # Ignore contemporaneous, exogenous variables
    if not s.get('prev_y0'):
        s = {'prev_y0':y0,
             'd':{}   # state for difference predicting skater
             }
        return y, 1.0, s
    else:
        dy0 = y0 - s['prev_y0']
        dy_hat, dy_hat_std = regress_level_on_first_known(y=[dy0], s=s['d'], k=k, a=a, t=t, e=e)
        x = [y0 + sum_dy for sum_dy in np.cumsum(dy_hat)]
        x_std = [ math.sqrt(v) for v in np.cumsum([ s**s for s in dy_hat_std])]
        return x, x_std, s
def divinity_univariate_factory(y: Y_TYPE,
                                s,
                                k: K_TYPE,
                                a=None,
                                t=None,
                                e=None,
                                max_buffer_len=1000,
                                n_warm=101,
                                model_params: dict = None):
    """ A partial wrapping of the divinity library with notable limitations:

         - Fits every invocation
         - Ignores exogenous variables
         - State is merely a buffer

    """
    y0 = wrap(y)[0]
    assert n_warm >= 101, ' You must use n_warm'

    if not s:
        s = dict(y=[])

    if y0 is None:
        return None, None, s  # Ignore suggestion to fit offline

    # Update buffer
    s['y'].append(y0)
    if len(s['y']) > max_buffer_len + 1000:
        s['y'] = s['y'][-max_buffer_len:]

    # Fit and predict, if warm, or just last value
    if len(s['y']) < max(n_warm, MIN_N_WARM):
        return [y0] * k, [abs(y0)] * k, s
    else:
        with no_stdout_stderr():
            kwargs = deepcopy(DIVINE_MODEL)
            if model_params:
                kwargs.update(**model_params)
            model = dv.divinity(forecast_length=k, **kwargs)
            model.fit(np.array(s['y']))
        x = list(model.predict())
        x_std = [1.0] * k  # TODO: fixme
        return x, x_std, s
def empirical_last_value(y: Y_TYPE,
                         s: dict,
                         k: int = 1,
                         a: A_TYPE = None,
                         t: T_TYPE = None,
                         e: E_TYPE = None) -> ([float], Any, Any):
    """ Last value cache, with empirical std """

    if not s.get('p'):
        s = {'p': {}}  # Initialize prediction parade

    if y is None:
        return None, None, s
    else:
        y0 = wrap(y)[0]  # Ignore the rest
        x = [y0] * k  # What a great prediction !
        bias, x_std, s['p'] = parade(p=s['p'], x=x,
                                     y=y0)  # update residual queue
        return x, x_std, s
 def fbprophet_known_r2(y: Y_TYPE,
                        s: dict,
                        k: int,
                        a: A_TYPE = None,
                        t: T_TYPE = None,
                        e: E_TYPE = None,
                        r: R_TYPE = None):
     """ Uses known-in-advance but not y[1:] """
     assert r is not None
     y0 = [wrap(y)[0]]
     param_names = ['changepoint_prior_scale', 'seasonality_prior_scale']
     return fbprophet_hyperparam_skater_factory(y=y0,
                                                s=s,
                                                k=k,
                                                a=a,
                                                t=t,
                                                e=e,
                                                r=r,
                                                param_names=param_names,
                                                recursive=False)
 def fbprophet_univariate_r2(y: Y_TYPE,
                             s: dict,
                             k: int,
                             a: A_TYPE = None,
                             t: T_TYPE = None,
                             e: E_TYPE = None,
                             r: R_TYPE = None):
     """ Simple univariate prediction using only y[0], and not 'a' or y[1:] """
     assert r is not None
     y0 = [wrap(y)[0]]
     param_names = ['changepoint_prior_scale', 'seasonality_prior_scale']
     return fbprophet_hyperparam_skater_factory(y=y0,
                                                s=s,
                                                k=k,
                                                a=None,
                                                t=t,
                                                e=e,
                                                r=r,
                                                param_names=param_names,
                                                recursive=False)
示例#21
0
def prior_plot(f,
               y=None,
               k=None,
               t=None,
               e=None,
               r=None,
               x0=np.nan,
               n=150,
               n_plot=25):
    """
         Apply state machine to univariate series,
         Show observations and out of sample predictions predictions
    """
    if y is None:
        y = brownian_with_noise(n=n)

    if t is None:
        t = [float(ti) for ti in range(len(y))]

    x, x_std = prior(f=f, y=y, k=k, a=t, t=t, e=e, r=r, x0=x0)
    ysf = [[wrap(y_)[0]] for y_ in y]
    xk = [xt[-1] for xt in x]
    plot_with_last_value(t=t, x=xk, y=ysf, k=k, n_plot=n_plot)
示例#22
0
def hypocratic_ema_r1(y: Y_TYPE,
                      s,
                      k: int,
                      a: A_TYPE = None,
                      t: T_TYPE = None,
                      e: E_TYPE = None,
                      r: R_TYPE = None):
    """
         r :  moving average parameter  (e.g. 0.75 is fast, 0.95 is slow)
    """
    y0 = wrap(y)[0]
    assert r is not None
    x, x_std, s = empirical_ema_r1(y=y0, s=s, k=k, a=a, t=t, e=e, r=r)

    def hypocratic(x: float, x_std: float, confidence=0.5):
        """ Shrink residual prediction towards zero """
        import math
        if abs(x_std) < 1e-6 or abs(x) < 1e-3 * x_std:
            return 0.0
        else:
            return x * math.tanh(confidence * abs(x) / (3 * x_std))

    x_resid = [hypocratic(xi, x_std) for xi, x_std in zip(x, x_std)]
    return x_resid, x_std, s
示例#23
0
def trivial_ema_r1(y: Y_TYPE,
                   s,
                   k: int = 1,
                   a: A_TYPE = None,
                   t: T_TYPE = None,
                   e: E_TYPE = None,
                   r: R_TYPE = None):
    """ Exponential moving average
          r      weight to place on existing anchor point
    """
    assert r is not None
    y0 = wrap(y)[0]
    if not s.get('rho'):
        s = {'x': y0, 'rho': r}
        assert 0 <= s['rho'] <= 1, 'Expecting rho=r to be between 0 and 1'
    else:
        assert abs(r - s['rho']) < 1e-6, 'rho=r is immutable'

    if y0 is None:
        return None, s, None
    else:
        s['x'] = s['rho'] * s['x'] + (1 - s['rho']) * y0  # Make me better !
        x = [s['x'] * k]
        return x, [1.0] * k, s
示例#24
0
def observance(y: [float], o: dict, k: int, a: [float] = None):
    """
    This marshals the k-step ahead vector a and the contemporaneous y[1:] and
    returns a combined vector of all exogenous variables.

    It tracks a list of x and corresponding y, by putting a's in a FIFO queue and
    by caching the previous value of y[1:]

    :param o:  state
    :param k:  Number of steps ahead that a is provided
    :param y:
    :param a:
    :returns:  x_t:[float] vector combining y[1:] with previously supplied a's
    """
    yw = wrap(y)
    aw = wrap(a)

    if not o:
        o = {
            'a': [None for _ in range(k)],
            'z': None,  # Stores the previous value of y[1:]
            'x': list(),
            'y': list()
        }

    y_t, z = split_exogenous(yw)

    # Get the contemporaneous variables from last observation
    if z:
        z_t = o.get('z')  # The previously revealed exogenous variables
        o['z'] = z  # Store for next time
    else:
        z = None
        z_t = None

    # Determine the known in advance variable pertaining to the present
    if aw:
        a_t = o['a'].pop(
            0)  # The known in advance variable pertaining to this time step
        o['a'].append(aw)  # Put the k-ahead received a value(s) on the queue
    else:
        a = None
        a_t = None

    # Combine into exogenous variables ... but only if both arrived
    if aw and z:
        x_t = z_t + a_t if (z_t and a_t) else None
    elif aw and not z:
        x_t = a_t if a_t else None
    elif (not aw) and z:
        x_t = z_t if z_t else None
    elif (not aw) and not z:
        x_t = None

    if (not z) and (not aw):
        o['y'].append([y_t])  # Special case, no need to wait
    else:
        if x_t:
            o['x'].append(x_t)
            o['y'].append([y_t])
        assert len(o['x']) == len(o['y']), "post-condition"
    return x_t, o
示例#25
0
def fbprophet_skater_factory(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None,
                             t: T_TYPE = None, e: E_TYPE = None,
                             emp_mass: float = 0.0, emp_std_mass: float = 0.0,
                             freq=None, recursive: bool = False,
                             model_params: dict = None,
                             n_max: int = None) -> ([float], Any, Any):
    """ Prophet skater with running prediction error moments
        Hyper-parameters are explicit here, whereas they are determined from r in actual skaters.
        Params of note:

             a: value of known-in-advance vars k step in advance (not contemporaneous with y)

    """

    assert 0 <= emp_mass <= 1
    assert 0 <= emp_std_mass <= 1

    if freq is None:
        freq = PROPHET_META['freq']
    if n_max is None:
        n_max = PROPHET_META['n_max']

    y = wrap(y)
    a = wrap(a)

    if not s.get('y'):
        s = {'p': {},     # parade
             'y': list(), # historical y
             'a': list(), # list of a known k steps in advance
             't': list(),
             'k': k}
    else:
        # Assert immutability of k, dimensions of y,a
        if s['y']:
            assert len(y) == len(s['y'][0])
            assert k == s['k']
        if s['a']:
            assert len(a) == len(s['a'][0])

    if y is None:
        return None, s, None
    else:
        s['y'].append(y)
        if a is not None:
            s['a'].append(a)
        if t is not None:
            assert isinstance(t,float), 'epoch time please'
            s['t'].append(t)

        if len(s['y']) > max(2 * k + 5, PROPHET_META['n_warm']):
            # Offset y, t, a are supplied to prophet interface
            t_arg = s['t'][k:] if t is not None else None
            a_arg = s['a']
            y_arg = s['y'][k:]
            x, x_std, forecast, model = prophet_iskater_factory(y=y_arg, k=k, a=a_arg, t=t_arg,
                                                                freq=freq, n_max=n_max,
                                                                recursive=recursive, model_params=model_params)
            s['m'] = True # Flag indicating a model has been fit (there is no point keeping the model itself, however)
        else:
            x = [y[0]] * k
            x_std = None

        # Get running mean prediction errors from the prediction parade
        x_resid, x_resid_std, s['p'] = parade(p=s['p'], x=x, y=y[0])
        x_resid = nonecast(x_resid,y[0])
        x_resid_std = nonecast(x_resid_std,1.0)

        # Compute center of mass between bias-corrected and uncorrected predictions
        x_corrected = np.array(x_resid) + np.array(x)
        x_center = nonecenter(m=[emp_mass, 1 - emp_mass], x=[x_corrected, x])
        x_std_center = nonecenter(m=[emp_std_mass, 1 - emp_std_mass], x=[x_resid_std, x_std])

        return x_center, x_std_center, s
示例#26
0
def ensemble_factory(y: Y_TYPE,
                     s: dict,
                     k: int,
                     a: A_TYPE = None,
                     t: T_TYPE = None,
                     e: E_TYPE = None,
                     fs: List = None,
                     rs: List = None,
                     g=None,
                     r=None,
                     include_std=True) -> ([float], Any, Any):
    """ Ensembles *only* the k-step ahead

          fs  - list of skaters
          rs  - list of hyper-params, if any
          g   - exogenous skater
          r   - hyper-param for g, if any
          include_std - bool. If True, will add x_std to the exogenous variables sent to g

    """
    if not s.get('s_fs'):
        s = {'s_fs': [{} for _ in fs], 's_g': {}, 'n_obs': 0}

    if y is None:
        return None, None, s
    else:
        # Apply models, keeping only the point estimate
        xjs = list()
        rs = rs or [None for _ in fs]
        for j, (f, r) in enumerate(zip(fs, rs)):
            if r is not None:
                xj, xj_std, s['s_fs'][j] = f(y=y,
                                             s=s['s_fs'][j],
                                             k=k,
                                             a=a,
                                             t=t,
                                             e=e,
                                             r=r)
            else:
                xj, xj_std, s['s_fs'][j] = f(y=y,
                                             s=s['s_fs'][j],
                                             k=k,
                                             a=a,
                                             t=t,
                                             e=e)
            xjs.append(xj[-1])
            if include_std:
                xjs.append(xj_std[-1])

        s['n_obs'] += 1
        if s['n_obs'] < 10:
            return [wrap(y)[0]] * k, [wrap(y)[0]] * k, s
        else:
            y_extend = [wrap(y)[0]] + xjs
            if r is None:
                x, x_std, s['s_g'] = g(y=y_extend,
                                       s=s['s_g'],
                                       k=k,
                                       a=a,
                                       t=t,
                                       e=e)
            else:
                x, x_std, s['s_g'] = g(y=y_extend,
                                       s=s['s_g'],
                                       k=k,
                                       a=a,
                                       t=t,
                                       e=e,
                                       r=r)
            return x, x_std, s
示例#27
0
def nproph_skater_factory(
    y: Y_TYPE,
    s: dict,
    k: int = 1,
    a: A_TYPE = None,
    t: T_TYPE = None,
    e: E_TYPE = None,
    method: str = 'default',
    n_warm=50,
    model_params: dict = None
) -> (Union[List[float], None], Union[List[float], None], Any):
    """ Predict using both simultaneously observed and known in advance variables
        y: Y_TYPE    scalar or list where y[1:] are interpreted as contemporaneously observed exogenous variables
        s:           state
        k:           Number of steps ahead to predict
        a:           (optional) scalar or list of variables known k-steps in advance.
                     When calling, provide the known variable k steps ahead, not the contemporaneous one.
        t:           (optional) Time of observation.
        e:           (optional) Maximum computation time (supply e>60 to give hint to do fitting)

        :returns: x [float] , s', scale [float]

        Remarks:
           - Model params cannot be changed after the first invocation.
           - Allows y=None to be used
    """
    y = wrap(y)
    a = wrap(a)

    if not s.get('n_obs'):
        # Initialize
        s['n_obs'] = 0
        s['model'] = None
        s['immutable'] = nproph_set_immutable(k=k, y=y, a=a, n_warm=n_warm)
        s['params'] = nproph_params(method=method)
        if model_params:
            s['params'].update(model_params)
        s['o'] = dict()  # Observance
    else:
        nproph_check_consistent_usage(y=y, s=s, a=a, k=k)

    tick(s)
    if t is not None:
        pass  # Other models might perform an evolution step here. Not applicable to nprophARIMA

    if y is not None:
        # Receive observation y[0], possibly exogenous y[1:] and possibly k-in-advance a[:]
        # Collect from queues the contemporaneous variables
        s['n_obs'] += 1
        y_t, z = split_exogenous(y)
        x_t, s['o'] = observance(y=y, o=s['o'], k=k, a=a)

        # Update the npropharima model itself
        if x_t is not None:
            if s['model'] is not None:
                if x_t:
                    s['model'].update([y_t], [x_t])
                else:
                    s['model'].update([y_t])

        # Predict
        if s['model'] is None:
            # Fall back to last value if there is no model calibrated as yet
            x = [y_t] * k
            if len(s['o']['x']) > 5 + 2 * k:
                Y = s['o']['y'][k + 1:]
                X = s['o']['x'][k + 1:]
                x_std = [
                    np.nanstd([xi[0] - yk[0] for xi, yk in zip(X, Y[j:])])
                    for j in range(1, k + 1)
                ]
            else:
                x_std = [1.0] * k  # Fallback to dreadful estimate
        else:
            # Predict forward, supplying known data if it exists
            if not a and not z:
                z_forward = None
            else:
                if not a:
                    z_forward = [z] * k
                else:
                    z_forward = [list(z) + list(ai) for ai in s['o']['a']
                                 ]  # Add known k-steps ahead
                    # This estimate could be improved by predicting z's and attenuating
                    # It is only really a good idea for k=1
            x, ntvls = s['model'].predict(n_periods=k,
                                          X=z_forward,
                                          return_conf_int=True,
                                          alpha=s['immutable']['alpha'])
            x_std = list([ntvl[1] - ntvl[0] for ntvl in ntvls])

    # Fit
    tock(s)
    if nproph_it_is_time_to_fit(s=s, e=e):
        tick(s)
        X = s['o'].get('x') or None
        Y = s['o']['y']
        # s['model'] = pm.auto_arima(y=Y, X=X, **s['params'])

        s['model'] = NeuralProphet(
            n_lags=s['params']['n_lags'],
            changepoints_range=s['params']['changepoints_range'],
            n_changepoints=s['params']['n_changepoints'],
            weekly_seasonality=s['params']['weekly_seasonality'],
            batch_size=s['params']['batch_size'],
            epochs=s['params']['epochs'],
            learning_rate=s['params']['learning_rate'],
        )
        dummy_freq = '5min'
        dummy_start = '2021-01-01'
        DF = pd.DataFrame(columns=['y'], data=Y)
        DF['ds'] = pd.date_range(start=dummy_start,
                                 periods=len(Y),
                                 freq=dummy_freq)
        s['model'].fit(DF, freq=dummy_freq)
        print(s['model'].data_params)
        pprint(tocks(s))
        tock(s, 'fit')
        pprint(tocks(s))

    if y is not None:
        return list(x), list(x_std), s
    else:
        return None, None, s
示例#28
0
    def prophet_iskater_factory(y: [[float]],
                                k: int,
                                a: List = None,
                                t: List = None,
                                e=None,
                                freq: str = None,
                                n_max=1000,
                                recursive: bool = False,
                                model_params: dict = None,
                                return_forecast=True):
        """
        :param y:           A list of observations, each a vector.
        :param k:           Number of steps ahead to predict
        :param a:           Known in advance observations - should be k more of these than y's
        :param t:           Epoch times of observations y. If len(t)=len(y)+k the last k are interpreted as future times.
        :param freq:        'D', '5T' etc, see https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/frequencies.py
        :param n_max:       Maximum number of observations to use, should you wish to prevent prophet from slowing down
        :param recursive    If True, exogenous variables y[1], y[2],... will be predicted forward in time
                                 (obviously this adds to computation time)
        :returns: x         k-vector of predictions
                  x_std     k-vector of standard deviations
                  forecast  full forecast dataframe, familiar to users of fbprophet
        """
        if a:
            assert len(a) == len(y) + k

        if isinstance(y[0], float):
            y = [wrap(yj) for yj in y]

        # Conversion of epoch times to UTC datetime
        # User must supply times, len(y) or len(y)+k, or a valid frequency str
        if t is None:
            if freq is None or not freq:
                freq = PROPHET_META['freq']  # Just assume away ...
            else:
                assert is_valid_freq(
                    freq), 'Freq ' + str(freq) + ' is not a valid frequency'
            dt = pd.date_range(start=EPOCH, periods=len(y), freq=freq)  # UTC
        else:
            freq = infer_freq_from_epoch(t)
            dt = epoch_to_naive_datetime(t)

        if len(dt) == len(y) + k:
            ta = dt
            dt = dt[:len(y)]
        else:
            assert len(dt) == len(
                y), 'Time vector t should be len(y) or len(y)+k'
            ta = None

        # Truncate history so that prophet doesn't take forever to fit
        y_shorter = y[-n_max:]
        a_shorter = a[-(n_max + k):] if a is not None else []  # may be empty
        dt_shorter = dt[-n_max:]

        # Massage data into Prophet friendly dataframe with columns y, y1, ..., yk, a0,...aj
        y_cols = [
            'y' + str(i) if i > 0 else 'y' for i in range(len(y_shorter[-1]))
        ]
        if a:
            a_cols = ['a' + str(i) for i in range(len(a_shorter[-1]))]
            data = [
                list(yi) + list(ai)
                for yi, ai in zip(y_shorter, a_shorter[:-k])
            ]
            df = pd.DataFrame(columns=y_cols + a_cols, data=data)
        else:
            data = [list(yi) for yi in y_shorter]
            df = pd.DataFrame(columns=y_cols, data=data)
        df['ds'] = dt_shorter

        # Instantiate Prophet model, ensure defaults are what we think they are
        kwargs_used = dict([(k, v) for k, v in PROPHET_MODEL.items()])
        if model_params:
            kwargs_used.update(model_params)
        m = Prophet(**kwargs_used)

        # Add regressors
        for y_col in y_cols[1:]:
            m.add_regressor(name=y_col)
        if a:
            for a_col in a_cols:
                m.add_regressor(name=a_col)

        # Fit the model every invocation ... there isn't any other way
        with no_stdout_stderr():
            m.fit(df)

        # Make future dataframe, adding known-in-advance variables
        future = m.make_future_dataframe(periods=k, freq=freq)
        if a:
            for j, a_col in enumerate(a_cols):
                future[a_col] = [ai[j] for ai in a_shorter]  # Known in advance
        if ta is not None:
            future['ds'] = ta  # override with user supplied future times

        # Next, we wish to add contemporaneously observed variables
        #
        # This is somewhat problematic, for how should we bring exogenously observed variables forward?
        # The simplest answer is, don't use them - only supply 1-vector y observations
        # prophet implicitly assumes all exogenous are known, which is a pretty big shortcoming.
        #
        # However, if we are trying to support y[1:], ...
        #   - It seems consistent to use prophet to predict these forward,
        #   - It also seems likely that this will lead to over-fitting.
        # I'm open to ideas here. Perhaps perform some hackery could effect attenuation of the coefficients
        # assigned to y[1],... such as jiggling past observations. For now we use prophet on each
        # one individually, feeding them the known in advance 'a' variables.

        n_exog = len(y[0]) - 1
        if n_exog > 0:
            for j, y_col in enumerate(y_cols):
                if j > 0:
                    yj = [yi[j] for yi in y_shorter]
                    if recursive:
                        yj_hat, yj_hat_std, yj_forecast, yj_m = prophet_iskater_factory(
                            y=yj,
                            k=k,
                            a=a_shorter,
                            freq=freq,
                            n_max=n_max,
                            recursive=False)
                    else:
                        yj_hat = [yj[-1]] * k
                    future[y_col] = yj + list(yj_hat)

        # Call the prediction function
        forecast = m.predict(future)
        x = list(forecast['yhat'].values[-k:]
                 )  # Use m.plot(forecast) to take a peak

        # Interpret confidence level difference as scale to be returned. TODO: set alpha properly so this really is 1-std
        x_std = list([
            u - l for u, l in zip(forecast['yhat_upper'].values[-k:],
                                  forecast['yhat_lower'].values[-k:])
        ])

        if return_forecast:
            return x, x_std, forecast, m
        else:
            return x, x_std
def residual_chaser_factory(y :Y_TYPE, s:dict, k:int, a:A_TYPE =None, t:T_TYPE =None, e:E_TYPE =None,
                            f1=None, f2=None, r1=None, r2=None)->([float] , Any , Any):
    """ Second model predicts k=1, k=k residuals of the first, and interpolates

          f1  - A skater making the primary prediction
          f2  - A skater designed to predict residuals ... both 1 step ahead and k-steps ahead
          r1  - hyper-params for f1, if any
          r2  - hyper-params for f2, if any

       It *may* make sense to choose an f2 that shrinks towards zero.
    """
    if k == 1:
        J = [1]
    else:
        J = [1,k]  # Determines horizons over which residual model is used.
                   # We'd rather not call the residual model k-times

    y0 = wrap(y)[0]
    if not s.get('s1'):
        s = {'sres': {},                      # Residual state ... used to determine the residual
             'x': y0,
             's1':{},                         # First model state
             's2':dict([(j,{}) for j in J]),  # Residual model states
             'n_obs':0}

    if y0 is None:
        return None, None, s
    else:
        # Use the first skater to predict
        if r1 is None:
            x1, x1_std, s['s1'] = f1(y=y,s=s['s1'],k=k, a=a,t=t,e=e)
        else:
            x1, x1_std, s['s1'] = f1(y=y, s=s['s1'], k=k, a=a, t=t, e=e, r=r1)
        resid1, s['sres'] = residual(s['sres'],y=y0,x=x1)

        s['n_obs']+=1

        # Use the second skater to predict j-step ahead residuals
        # There are two copies of the residual model employed.
        res_j_hat = [None for j in J]
        res_j_std = [None for j in J]
        for jpos,j in enumerate(J):
            j_ahead_residual = resid1[j-1]
            if r2 is None:
                _x, _std, s['s2'][j] = f2(y=j_ahead_residual, s=s['s2'][j], k=j, a=a, t=t, e=e)
            else:
                _x, _std, s['s2'][j] = f2(y=j_ahead_residual, s=s['s2'][j], k=j, a=a, t=t, e=e,r=r2)
            res_j_hat[jpos] = _x[jpos]
            res_j_std[jpos] = _std[jpos]

        # Interpolate
        if k==1:
            res_interp = res_j_hat
            res_interp_std = res_j_std
        else:
            import numpy as np
            ks = list(range(1,k+1))
            res_interp = np.interp( x=ks, xp=J, fp=res_j_hat )
            res_interp_std = np.interp(x=ks, xp=J, fp=res_j_std)

        # Residual   res =  y - x1,   so  x1+res ~ y  .... one hopes
        x_hat = [ resj+x1j for resj, x1j in zip( res_interp, x1) ]
        return x_hat, res_interp_std, s