def seasonal_mean(x, freq): """ Return means for each period in x. freq is an int that gives the number of periods per cycle. E.g., 12 for monthly. NaNs are ignored in the mean. """ return np.array([pd_nanmean(x[i::freq]) for i in range(freq)])
def seasonal_mean(x, freq): """ Return means for each period in x. freq is an int that gives the number of periods per cycle. E.g., 12 for monthly. NaNs are ignored in the mean. """ return np.array([pd_nanmean(x[i::freq], axis=0) for i in range(freq)])
def decompose(df, period=365, lo_frac=0.6, lo_delta=0.01): """Create a seasonal-trend (with Loess, aka "STL") decomposition of observed time series data. This implementation is modeled after the ``statsmodels.tsa.seasonal_decompose`` method but substitutes a Lowess regression for a convolution in its trend estimation. This is an additive model, Y[t] = T[t] + S[t] + e[t] For more details on lo_frac and lo_delta, see: `statsmodels.nonparametric.smoothers_lowess.lowess()` Args: df (pandas.Dataframe): Time series of observed counts. This DataFrame must be continuous (no gaps or missing data), and include a ``pandas.DatetimeIndex``. period (int, optional): Most significant periodicity in the observed time series, in units of 1 observation. Ex: to accomodate strong annual periodicity within years of daily observations, ``period=365``. lo_frac (float, optional): Fraction of data to use in fitting Lowess regression. lo_delta (float, optional): Fractional distance within which to use linear-interpolation instead of weighted regression. Using non-zero ``lo_delta`` significantly decreases computation time. Returns: `statsmodels.tsa.seasonal.DecomposeResult`: An object with DataFrame attributes for the seasonal, trend, and residual components, as well as the average seasonal cycle. """ # use some existing pieces of statsmodels lowess = sm.nonparametric.lowess _pandas_wrapper, _ = _maybe_get_pandas_wrapper_freq(df) # get plain np array observed = np.asanyarray(df).squeeze() # calc trend, remove from observation trend = lowess(observed, [x for x in range(len(observed))], frac=lo_frac, delta=lo_delta * len(observed), return_sorted=False) detrended = observed - trend # period must not be larger than size of series to avoid introducing NaNs period = min(period, len(observed)) # calc one-period seasonality, remove tiled array from detrended period_averages = np.array([pd_nanmean(detrended[i::period]) for i in range(period)]) # 0-center the period avgs period_averages -= np.mean(period_averages) seasonal = np.tile(period_averages, len(observed) // period + 1)[:len(observed)] resid = detrended - seasonal # convert the arrays back to appropriate dataframes, stuff them back into # the statsmodel object results = list(map(_pandas_wrapper, [seasonal, trend, resid, observed])) dr = DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3], period_averages=period_averages) return dr
def _seaonal_cyle_ufunc(data, period=None): ''' _seasonal_cyle(data, period=None) calculates a repeating seasonal cycle Parameters ---------- data: 1-D numpy array The y-values of the observed points period: float the period of the seasonal cycle. This depends on the sampling frequency of your data if monthly, then it is 12 if daily then 365 Returns ------- out: ndarray, float returns repeating seasonal cycle ''' ### This adds an extra dimension if 1D ### Turns DataArray into numpy array if (len(data.shape)==1): data = np.expand_dims(data, axis=1) ### If importing an xr.DataArray make numpy array if (type(data)==type(xr.DataArray([]))): data = data.values ### Get dimensions ndim0 = np.shape(data)[0] ndim1 = np.shape(data)[1] ### Allocate space to store data seasonal = np.ones((ndim0, ndim1))*np.NaN ### Loop over the stacked dimension #for dim1 in tqdm(range(ndim1)): for dim1 in range(ndim1): ### Mask is true if not a NaN mask = ~np.isnan(data[:, dim1]) ### If the mask is all false ### We will skip that point if np.sum(mask)!=0: period_averages = np.array([pd_nanmean(data[i::period,dim1]) for i in range(period)]) period_averages = period_averages - np.mean(period_averages) seasonal[:,dim1] = np.tile(period_averages, len(data[:,dim1]) // period + 1)[:len(data[:,dim1])] return seasonal
def series_seasonal(df, window): seasonal = np.array( [pd_nanmean(df[i::window], axis=0) for i in range(window)]) return seasonal
def seasonalMean(s, freq): return np.array([pd_nanmean(s[i::freq]) for i in range(freq)])
def seasonal_mean(x, freq): return np.array([pd_nanmean(x[i::freq], axis=0) for i in range(freq)])