def regress_level_on_first_known(y: Y_TYPE, s: dict, k, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None) -> ([float], Any, Any): """ Very basic online regression skater, mostly for testing - Only one known in advance variable is utilized - Last value is ignored, unless a is None in which case we return 0.0 - Empirical std is returned """ y0 = wrap(y)[0] # Ignore contemporaneous, exogenous variables if a: a0 = wrap(a)[0] # Ignore all but the first known-in-advance variable if not s.get('k'): # First invocation s = {'p': {}} # Prediction parade s['r'] = {} # Regression state, not to be confused with hyper-param r s['k'] = k s['o'] = { } # The "observance" will quarantine 'a' until it can be matched else: assert s['k'] == k # Immutability if a is None: return [0] * k, [1.0] * k, s else: a_t, s['o'] = observance(y=[y0], o=s['o'], k=k, a=[a0]) # Update the observance if a_t is not None: # This is the contemporaneous 'a', which was supplied k calls ago. if not s['r']: # When first calling the online regression algorithm we avoid the degenerate case # by sending it two observations. y_noise = 0.1 * (1e-6 + abs(y0)) * np.random.randn() x_noise = 0.1 * (1e-6 + abs(a0)) * np.random.randn() x = [a_t[0] - x_noise, a_t[0] + x_noise] y = [y0 - y_noise, y0 + y_noise] s['r'] = regress_one_helper(x=x, y=y, r=s['r']) else: s['r'] = regress_one_helper(x=a_t, y=[y0], r=s['r']) # Predict using contemporaneous alpha's x = [ s['r']['alpha'] + s['r']['beta'] * ak[0] for ak in s['o']['a'] ] # Push prediction into the parade and get the current bias/stderr bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) return x, x_std, s # TODO: Use the std implied by regression instead else: x = [y0] * k bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) return x, x_std, s
def fbprophet_cautious(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Similar to fbexogenous, but no crazy nonsense """ if not s.get('s'): s['s'] = {} # prophet's state s['y'] = list() # maintain last five values y0 = wrap(y)[0] s['y'].append(y0) if len(s['y']) > 5: s['y'].pop(0) import math x_upper = [ np.max(s['y']) + math.sqrt(j + 1) * np.std(s['y']) for j in range(k) ] x_lower = [ np.min(s['y']) - math.sqrt(j + 1) * np.std(s['y']) for j in range(k) ] x, x_std, s['s'] = fbprophet_univariate(y=y, s=s['s'], k=k, a=a, t=t, e=e) x_careful = np.minimum(np.array(x), np.array(x_upper)) x_careful = np.maximum(x_careful, np.array(x_lower)) return list(x_careful), x_std, s
def moving_average_r1(y: Y_TYPE, s, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, r: R_TYPE = None): """ Exponential moving average, with empirical std r weight to place on existing anchor point """ assert r is not None y0 = wrap(y)[0] if not s.get('p'): s = {'p': {}, 'x': y0, 'rho': r} assert 0 <= s['rho'] <= 1, 'Expecting rho=r to be between 0 and 1' else: assert abs(r - s['rho']) < 1e-6, 'rho=r is immutable' if y0 is None: return None, s, None else: s['x'] = s['rho'] * s['x'] + (1 - s['rho']) * y0 # Make me better ! x = [s['x'] * k] bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) # Update prediction queue return [s['x']] * k, x_std, s
def is_opinonated(y, forecast: pd.DataFrame, k: int, n_recent: int, multiple: float) -> bool: """ Check if the forecast is far from any recent values, and thus "opinionated" :param y: data used to fit :param forecast: dataframe produced by prophet fitting :param m: fitted facebook prophet model :param k: number of steps ahead :return: """ if isinstance(y[0], float): y = [wrap(yj) for yj in y] y0 = [yj[0] for yj in y] for j in range(1, k + 1): j_std = np.nanstd(np.diff(y0[-k - 50:-k], j)) recent_ys = y0[-(k + n_recent):-(k + 1)] upper = np.max(recent_ys) + multiple * j_std * math.sqrt(j) + 0.1 lower = np.min(recent_ys) - multiple * j_std * math.sqrt(j) - 0.1 j_x = forecast['yhat'].values[-(1 + k - j)] if j_x > upper or j_x < lower: deviation = abs(j_x - upper) print(deviation) return True return False
def regress_change_on_first_known(y: Y_TYPE, s: dict, k, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None) -> ([float], Any, Any): """ Very basic modification of the last value cache. This looks at the contemporaneous influence of a single known in advance variable. Assumes independent increments when estimating the standard deviation. This is also intended to illustrate combination of skaters """ y0 = wrap(y)[0] # Ignore contemporaneous, exogenous variables if not s.get('prev_y0'): s = { 'prev_y0': y0, 'd': {} # state for difference predicting skater } return y, 1.0, s else: dy0 = y0 - s['prev_y0'] dy_hat, dy_hat_std = regress_level_on_first_known(y=[dy0], s=s['d'], k=k, a=a, t=t, e=e) x = [y0 + sum_dy for sum_dy in np.cumsum(dy_hat)] x_std = [math.sqrt(v) for v in np.cumsum([s**s for s in dy_hat_std])] return x, x_std, s
def fbprophet_skater_testor(y: Y_TYPE, s: dict = None, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, r: R_TYPE = None, freq=None, n_max=None): """ A default facebook prophet usage, with no hyper-parameters and no prediction parade """ # For testing if freq is None: freq = PROPHET_META['freq'] if n_max is None: n_max = PROPHET_META['n_max'] y = wrap(y) a = wrap(a) if not s.get('y'): s = {'y': list(), 'a': list(), 'k': k} else: # Assert immutability of k, dimensions if s['y']: assert len(y) == len(s['y'][0]) assert k == s['k'] if s['a']: assert len(a) == len(s['a'][0]) if y is None: return None, s, None else: s['y'].append(y) if a is not None: s['a'].append(a) if len(s['y']) > max(2 * k + 5, PROPHET_META['n_warm']): x, x_std, _, _ = prophet_iskater_factory(y=s['y'], k=k, a=s['a'], freq=freq, n_max=n_max) else: x = [y[0]] * k x_std = [1.0] * k return x, x_std, s
def fbprophet_known(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Uses known-in-advance but not y[1:] """ y0 = [wrap(y)[0]] return fbprophet_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e)
def fbprophet_univariate(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Simple univariate prediction using only y[0], and not 'a' or y[1:] """ y0 = [wrap(y)[0]] return fbprophet_skater_factory(y=y0, s=s, k=k, a=None, t=t, e=e)
def pmd_univariate(y: Y_TYPE, s: dict, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Uses only y[0] and ignores y[1:] and a[:] """ y0 = [wrap(y)[0]] return pmd_skater_factory(y=y0, s=s, k=k, a=None, t=t, e=e, method='auto')
def pmd_known(y: Y_TYPE, s: dict, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Uses known-in-advance but not y[1:] """ y0 = [wrap(y)[0]] return pmd_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e, method='auto')
def empirical_last_value(y :Y_TYPE, s:dict, k:int =1, a:A_TYPE =None, t:T_TYPE =None, e:E_TYPE =None)->([float] , Any , Any): """ Last value cache, with empirical std """ if not s.get('p'): s = {'p':{}} # Initialize prediction parade if y is None: return None, None, s else: y0 = wrap(y)[0] # Ignore the rest x = [y0]*k # What a great prediction ! bias, x_std, s['p'] = parade(p=s['p'], x=x, y=y0) # update residual queue return x, x_std, s
def trivial_last_value(y: Y_TYPE, s: dict, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None) -> ([float], [float], Any): """ Last value cache """ if y is None: return None, None, s else: y0 = wrap(y)[0] # Ignore the rest x = [y0] * k # What a great prediction ! return x, 1.0, {}
def dlm_exogenous_r3(y, s, k, a, t, e, r): """ One way to use dlm :returns: x, s', w """ if not s: s = dict() s['dim'] = dimension(y) s = dlm_set_exog_hyperparams(s=s, r=r) y0, exog = split_exogenous(y=y) s['n_obs'] = 0 s['model'] = quietDlm([], printInfo=False) + trend( s['trend_degree'], s['discount']) + seasonality( s['period'], s['discount']) s['model'] = s['model'] + fixedAutoReg( degree=s['auto_degree'], name='ar', w=1.0) if exog: exog_wrapped = [[None if np.isnan(ex0) else ex0 for ex0 in exog]] s['model'] = s['model'] + dynamic(features=exog_wrapped, discount=0.99, name='exog') # Set's first exog if y is not None: y = wrap(y) assert dimension(y) == s['dim'], 'Cannot change dimension of data sent' s['n_obs'] += 1 y0, exog = split_exogenous(y=y) y0_passed_in = None if np.isnan( y0) else y0 # pydlm uses None for missing values s['model'].append([y0_passed_in]) if exog: exog_wrapped = [[None if np.isnan(ex0) else ex0 for ex0 in exog]] if s['n_obs'] > 1: s['model'].append( data=exog_wrapped, component='exog') # Don't get first exog twice num_obs = len(s['model'].data) if s.get('model') else 0 if num_obs % s['n_fit'] == s['n_fit'] - 1: _, _, s = dlm_exogenous_r3(y=None, s=s, k=k, a=a, t=t, e=10, r=r) s['model'].fitForwardFilter() return _dlm_exog_prediction_helper(s=s, k=k, y=y) if y is None: if dimension(y) == 1: s['model'].tune(maxit=20) # Don't tune if exogenous ... haven't got this to work s['model'].fit() return None, None, s
def fbprophet_univariate_r2(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None): """ Simple univariate prediction using only y[0], and not 'a' or y[1:] """ y0 = [wrap(y)[0]] param_names = ['changepoint_prior_scale', 'seasonality_prior_scale'] return fbprophet_hyperparam_skater_factory(y=y0, s=s, k=k, a=None, t=t, e=e, param_names=param_names, recursive=False)
def divinity_univariate_factory(y: Y_TYPE, s, k: K_TYPE, a=None, t=None, e=None, max_buffer_len=1000, n_warm=101, model_params: dict = None): """ A partial wrapping of the divinity library with notable limitations: - Fits every invocation - Ignores exogenous variables - State is merely a buffer """ y0 = wrap(y)[0] assert n_warm >= 101, ' You must use n_warm' if not s: s = dict(y=[]) if y0 is None: return None, None, s # Ignore suggestion to fit offline # Update buffer s['y'].append(y0) if len(s['y']) > max_buffer_len + 1000: s['y'] = s['y'][-max_buffer_len:] # Fit and predict, if warm, or just last value if len(s['y']) < max(n_warm, MIN_N_WARM): return [y0] * k, [abs(y0)] * k, s else: with no_stdout_stderr(): kwargs = deepcopy(DIVINE_MODEL) if model_params: kwargs.update(**model_params) model = dv.divinity(forecast_length=k, **kwargs) model.fit(np.array(s['y'])) x = list(model.predict()) x_std = [1.0] * k # TODO: fixme return x, x_std, s
def fbprophet_known_r2(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, r: R_TYPE = 0.0): """ Uses known-in-advance but not y[1:] """ y0 = [wrap(y)[0]] param_names = ['changepoint_prior_scale', 'seasonality_prior_scale'] return fbprophet_hyperparam_skater_factory(y=y0, s=s, k=k, a=a, t=t, e=e, r=r, param_names=param_names, recursive=False)
def prior_plot(f, y=None, k=None, t=None, e=None, r=None, x0=np.nan, n=150, n_plot=25): """ Apply state machine to univariate series, Show observations and out of sample predictions predictions """ if y is None: y = brownian_with_noise(n=n) if t is None: t = [float(ti) for ti in range(len(y))] x, x_std = prior(f=f, y=y, k=k, a=t, t=t, e=e, r=r, x0=x0) ysf = [[wrap(y_)[0]] for y_ in y] xk = [xt[-1] for xt in x] plot_with_last_value(t=t, x=xk, y=ysf, k=k, n_plot=n_plot)
def trivial_ema_r1(y: Y_TYPE, s, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, r: R_TYPE = None): """ Exponential moving average r weight to place on existing anchor point """ assert r is not None y0 = wrap(y)[0] if not s.get('rho'): s = {'x': y0, 'rho': r} assert 0 <= s['rho'] <= 1, 'Expecting rho=r to be between 0 and 1' else: assert abs(r - s['rho']) < 1e-6, 'rho=r is immutable' if y0 is None: return None, s, None else: s['x'] = s['rho'] * s['x'] + (1 - s['rho']) * y0 # Make me better ! x = [s['x'] * k] return x, [1.0] * k, s
def residual_chaser_factory(y: Y_TYPE, s: dict, k: int = 1, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, f1=None, f2=None, chase=1.0, threshold=1.0, r1=None, r2=None) -> ([float], Any, Any): """ Last value cache, with empirical std and self-correction f1 - A skater making the primary prediction f2 - A skater designed to predict residuals chase - Fraction of f2's residual prediction to use threshold - Number of standard deviations the residual prediction must exceed before we chase it. r1 - hyper-params for f1, if any r2 - hyper-params for f2, if any """ y0 = wrap(y)[0] if not s.get('p1'): s = {'p1': {}, 'x': y0, 's1': {}, 's2': {}, 'n_obs': 0} if y0 is None: return None, None, s else: # Use the first skater to predict if r1 is None: x1, x1_std, s['s1'] = f1(y=y, s=s['s1'], k=k, a=a, t=t, e=e) else: x1, x1_std, s['s1'] = f1(y=y, s=s['s1'], k=k, a=a, t=t, e=e, r=r1) x1_error_mean, x1_error_std, s['p1'] = parade( p=s['p1'], x=x1, y=y0) # Update prediction queue s['n_obs'] += 1 # Use the second skater to predict mean residual k-steps ahead xke = x1_error_mean[-1] if r2 is None: xke_hat_mean, xke_hat_std, s['s2'] = f2(y=[xke], s=s['s2'], k=k, a=a, t=t, e=e) else: xke_hat_mean, xke_hat_std, s['s2'] = f2(y=[xke], s=s['s2'], k=k, a=a, t=t, e=e, r=r2) # If the bias prediction is confident, adjust x1 chasing it towards the bias corrected value if s['n_obs'] > 10: for j in range(len(x1)): if abs(xke_hat_mean[j]) > threshold * xke_hat_std[j]: x1[j] = x1[j] + chase * xke_hat_mean[j] return x1, x1_std, s
def fbprophet_skater_factory(y: Y_TYPE, s: dict, k: int, a: A_TYPE = None, t: T_TYPE = None, e: E_TYPE = None, emp_mass: float = 0.0, emp_std_mass: float = 0.0, freq=None, recursive: bool = False, model_params: dict = None, n_max: int = None) -> ([float], Any, Any): """ Prophet skater with running prediction error moments Hyper-parameters are explicit here, whereas they are determined from r in actual skaters. Params of note: a: value of known-in-advance vars k step in advance (not contemporaneous with y) """ assert 0 <= emp_mass <= 1 assert 0 <= emp_std_mass <= 1 if freq is None: freq = PROPHET_META['freq'] if n_max is None: n_max = PROPHET_META['n_max'] y = wrap(y) a = wrap(a) if not s.get('y'): s = {'p': {}, # parade 'y': list(), # historical y 'a': list(), # list of a known k steps in advance 't': list(), 'k': k} else: # Assert immutability of k, dimensions of y,a if s['y']: assert len(y) == len(s['y'][0]) assert k == s['k'] if s['a']: assert len(a) == len(s['a'][0]) if y is None: return None, s, None else: s['y'].append(y) if a is not None: s['a'].append(a) if t is not None: assert isinstance(t,float), 'epoch time please' s['t'].append(t) if len(s['y']) > max(2 * k + 5, PROPHET_META['n_warm']): # Offset y, t, a are supplied to prophet interface t_arg = s['t'][k:] if t is not None else None a_arg = s['a'] y_arg = s['y'][k:] x, x_std, forecast, model = prophet_iskater_factory(y=y_arg, k=k, a=a_arg, t=t_arg, freq=freq, n_max=n_max, recursive=recursive, model_params=model_params) s['m'] = True # Flag indicating a model has been fit (there is no point keeping the model itself, however) else: x = [y[0]] * k x_std = None # Get running mean prediction errors from the prediction parade x_resid, x_resid_std, s['p'] = parade(p=s['p'], x=x, y=y[0]) x_resid = nonecast(x_resid,y[0]) x_resid_std = nonecast(x_resid_std,1.0) # Compute center of mass between bias-corrected and uncorrected predictions x_corrected = np.array(x_resid) + np.array(x) x_center = nonecenter(m=[emp_mass, 1 - emp_mass], x=[x_corrected, x]) x_std_center = nonecenter(m=[emp_std_mass, 1 - emp_std_mass], x=[x_resid_std, x_std]) return x_center, x_std_center, s
def observance(y: [float], o: dict, k: int, a: [float] = None): """ This marshals the k-step ahead vector a and the contemporaneous y[1:] and returns a combined vector of all exogenous variables. It tracks a list of x and corresponding y, by putting a's in a FIFO queue and by caching the previous value of y[1:] :param o: state :param k: Number of steps ahead that a is provided :param y: :param a: :returns: x_t:[float] vector combining y[1:] with previously supplied a's """ yw = wrap(y) aw = wrap(a) if not o: o = { 'a': [None for _ in range(k)], 'z': None, # Stores the previous value of y[1:] 'x': list(), 'y': list() } y_t, z = split_exogenous(yw) # Get the contemporaneous variables from last observation if z: z_t = o.get('z') # The previously revealed exogenous variables o['z'] = z # Store for next time else: z = None z_t = None # Determine the known in advance variable pertaining to the present if aw: a_t = o['a'].pop( 0) # The known in advance variable pertaining to this time step o['a'].append(aw) # Put the k-ahead received a value(s) on the queue else: a = None a_t = None # Combine into exogenous variables ... but only if both arrived if aw and z: x_t = z_t + a_t if (z_t and a_t) else None elif aw and not z: x_t = a_t if a_t else None elif (not aw) and z: x_t = z_t if z_t else None elif (not aw) and not z: x_t = None if (not z) and (not aw): o['y'].append([y_t]) # Special case, no need to wait else: if x_t: o['x'].append(x_t) o['y'].append([y_t]) assert len(o['x']) == len(o['y']), "post-condition" return x_t, o
def pmd_skater_factory(y:Y_TYPE, s:dict, k:int=1, a:A_TYPE=None, t:T_TYPE=None, e:E_TYPE=None, method: str= 'default', n_warm=50, model_params:dict=None)->(Union[List[float],None], Union[List[float],None], Any): """ Predict using both simultaneously observed and known in advance variables y: Y_TYPE scalar or list where y[1:] are interpreted as contemporaneously observed exogenous variables s: state k: Number of steps ahead to predict a: (optional) scalar or list of variables known k-steps in advance. When calling, provide the known variable k steps ahead, not the contemporaneous one. t: (optional) Time of observation. e: (optional) Maximum computation time (supply e>60 to give hint to do fitting) :returns: x [float] , s', scale [float] Remarks: - Model params cannot be changed after the first invocation. - Allows y=None to be used """ y = wrap(y) a = wrap(a) if not s.get('n_obs'): # Initialize s['n_obs'] = 0 s['model'] = None s['immutable'] = pmd_set_immutable(k=k, y=y, a=a, n_warm=n_warm) s['params'] = pmd_params(method=method) if model_params: s['params'].update(model_params) s['o'] = dict() # Observance else: pmd_check_consistent_usage(y=y,s=s,a=a,k=k) tick(s) if t is not None: pass # Other models might perform an evolution step here. Not applicable to PMDARIMA if y is not None: # Receive observation y[0], possibly exogenous y[1:] and possibly k-in-advance a[:] # Collect from queues the contemporaneous variables s['n_obs']+=1 y_t, z = split_exogenous(y) x_t, s['o'] = observance(y=y,o=s['o'],k=k,a=a) # Update the pmdarima model itself if x_t is not None: if s['model'] is not None: if x_t: s['model'].update([y_t], [x_t]) else: s['model'].update([y_t]) # Predict if s['model'] is None: # Fall back to last value if there is no model calibrated as yet x = [y_t]*k if len(s['o']['x']) > 5 + 2*k: Y = s['o']['y'][k+1:] X = s['o']['x'][k+1:] x_std = [ np.nanstd( [ xi[0]-yk[0] for xi, yk in zip( X, Y[j:] ) ] ) for j in range(1,k+1) ] else: x_std = [1.0]*k # Fallback to dreadful estimate else: # Predict forward, supplying known data if it exists if not a and not z: z_forward = None else: if not a: z_forward = [z]*k else: z_forward = [ list(z) + list(ai) for ai in s['o']['a'] ] # Add known k-steps ahead # This estimate could be improved by predicting z's and attenuating # It is only really a good idea for k=1 x, ntvls = s['model'].predict(n_periods=k, X=z_forward, return_conf_int=True, alpha=s['immutable']['alpha']) x_std = list([ ntvl[1] - ntvl[0] for ntvl in ntvls ]) # Fit tock(s) if pmd_it_is_time_to_fit(s=s, e=e): tick(s) X = s['o'].get('x') or None Y = s['o']['y'] s['model'] = pm.auto_arima(y=Y, X=X, **s['params']) print(s['model']) print(s['model'].arima_res_.params) pprint(tocks(s)) tock(s,'fit') pprint(tocks(s)) if y is not None: return list(x), list(x_std), s else: return None, None, s