示例#1
0
def get_resid(df):
    stl = STL(df['avg_time'], period=8, robust=True)
    res_avgt = stl.fit()

    stl = STL(df['num'], period=8, robust=True)
    res_num = stl.fit()

    return res_avgt.resid, res_num.resid
def test_period_detection(default_kwargs):
    class_kwargs, _, _ = _to_class_kwargs(default_kwargs)
    mod = STL(**class_kwargs)
    res = mod.fit()

    del class_kwargs['period']
    endog = class_kwargs['endog']
    index = pd.date_range('1-1-1959', periods=348, freq='M')
    class_kwargs['endog'] = pd.Series(endog, index=index)
    mod = STL(**class_kwargs)

    res_implicit_period = mod.fit()
    assert_allclose(res.seasonal, res_implicit_period.seasonal)
示例#3
0
    def fit(self, *, inner_iter=None, outer_iter=None, fit_kwargs=None):
        """
        Estimate STL and forecasting model parameters.

        Parameters
        ----------\n%(fit_params)s
        fit_kwargs : Dict[str, Any]
            Any additional keyword arguments to pass to ``model``'s ``fit``
            method when estimating the model on the decomposed residuals.

        Returns
        -------
        STLForecastResults
            Results with forecasting methods.
        """
        fit_kwargs = {} if fit_kwargs is None else fit_kwargs
        stl = STL(self._endog, **self._stl_kwargs)
        stl_fit: DecomposeResult = stl.fit(inner_iter=inner_iter,
                                           outer_iter=outer_iter)
        model_endog = stl_fit.trend + stl_fit.resid
        mod = self._model(model_endog, **self._model_kwargs)
        res = mod.fit(**fit_kwargs)
        if not hasattr(res, "forecast"):
            raise AttributeError(
                "The model's result must expose a ``forecast`` method.")
        return STLForecastResults(stl, stl_fit, mod, res, self._endog)
示例#4
0
 def predict_past(self, df, freq_period, steps):
     scalerfile = self.directory + '/scaler_pred.sav'
     if not os.path.isfile(scalerfile) or os.path.isfile(scalerfile):
         if (df["y"].max() - df["y"].min()) > 100:
             if self.verbose == 1:
                 print("PowerTransformation scaler used")
             scaler = PowerTransformer()
         else:
             if self.verbose == 1:
                 print("Identity scaler used")
             scaler = IdentityTransformer()
         self.scaler2 = scaler.fit(np.reshape(np.array(df["y"]), (-1, 1)))
         Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1)))
         pickle.dump(self.scaler2, open(scalerfile, 'wb'))
     elif os.path.isfile(scalerfile):
         self.scaler2 = pickle.load(open(scalerfile, "rb"))
         Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1)))
     if freq_period % 2 == 0:
         freq_period = freq_period + 1
     decomposition = STL(Y, period=freq_period + 1)
     decomposition = decomposition.fit()
     decomposition.plot()
     plt.show()
     df.loc[:, 'trend'] = decomposition.trend
     df.loc[:, 'seasonal'] = decomposition.seasonal
     df.loc[:, 'residual'] = decomposition.resid
     df= df.fillna(method="bfill")
     self.trend = np.asarray(df.loc[:, 'trend'])
     self.seasonal = np.asarray(df.loc[:, 'seasonal'])
     self.residual = np.asarray(df.loc[:, 'residual'])
     prediction, _, _ = self.make_prediction(steps)
     return prediction[0]
示例#5
0
def smad(ts,
         m=3.0,
         period=None,
         stl_seasonal=25,
         only_low_values=False,
         score=False):
    '''
        Seasonal-MAD

        Input:
            ts: pd.Series with DateTimeIndex
            m:  stardard deviation
            period: time series seasonal periodo
            stl_seasonal: STL Seasonal parameter
            only_low_values: return anomalies only for low values
            score: if True returns the decision function
        Output:
    '''
    # Seasonal component according to the Papper
    if period is not None:
        stl = STL(ts, period=period, seasonal=stl_seasonal)
    else:
        stl = STL(ts, seasonal=stl_seasonal)
    res = stl.fit()  # fit
    # calculamos o residuo
    residuo = ts - np.nanmedian(ts) - res.seasonal
    # Search outlier with mad
    mad = MAD(only_low_values=only_low_values)
    mad.fit(residuo)
    # return
    if score:
        return mad.decision_function(residuo)
    else:
        index = mad.predict(residuo, m=m).index
        return ts.loc[index]
示例#6
0
  def extract_climate_trend(self, df, trend='STL'):
    '''
      input_params: 
        df: input the dataframe of which the trends are to be extracted from
            requirements for the dataframe:
            - dataframe index need to be datetime,
            - datetime index should be sorted
            - should be a monthly resampling
    '''
    climate_trend_df = pd.DataFrame()

    if trend == 'STL':
      yr_list = df.index.year
      #print(yr_list[-1])
      #print(yr_list[0])
      seasons = yr_list[-1] - yr_list[0]

      if seasons % 2 == 0:
        seasons += 1
      

      for col in df:
        stl = STL(df[col], period=12, seasonal=seasons, robust=True)
        res = stl.fit()
        #print(res.trend)
        climate_trend_df[col] = res.trend
        
    return climate_trend_df
def decompostion_STL(series, period=None, title=''):
    from statsmodels.tsa.seasonal import STL

    stl = STL(series, period=period, robust=True)
    res_robust = stl.fit()
    fig = res_robust.plot()
    fig.text(0.1, 0.95, title, size=15, color='purple')
    plt.show()
def test_short_class(default_kwargs_short):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs_short)
    mod = STL(**class_kwargs)
    res = mod.fit(outer_iter=outer, inner_iter=inner)

    expected = results.loc['short'].sort_index()
    assert_allclose(res.seasonal, expected.season)
    assert_allclose(res.trend, expected.trend)
    assert_allclose(res.weights, expected.rw)
 def _decompose(self, ts):
     if self.dku_config.model == "multiplicative":
         self.parameters["endog"] = np.log(ts)
         stl = STL(**self.parameters)
         statsmodel_results = stl.fit()
         trend = np.exp(statsmodel_results.trend.values)
         seasonal = np.exp(statsmodel_results.seasonal.values)
         residuals = np.exp(statsmodel_results.resid.values)
         decomposition = _DecompositionResults(trend=trend,
                                               seasonal=seasonal,
                                               residuals=residuals)
     elif self.dku_config.model == "additive":
         self.parameters["endog"] = ts
         stl = STL(**self.parameters)
         statsmodel_results = stl.fit()
         decomposition = _DecompositionResults()
         decomposition.load(statsmodel_results)
     return decomposition
示例#10
0
def test_pickle(default_kwargs):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    mod = STL(**class_kwargs)
    res = mod.fit()
    pkl = pickle.dumps(mod)
    reloaded = pickle.loads(pkl)
    res2 = reloaded.fit()
    assert_allclose(res.trend, res2.trend)
    assert_allclose(res.seasonal, res2.seasonal)
    assert mod.config == reloaded.config
def test_ntjump_1_class(default_kwargs):
    default_kwargs['ntjump'] = 1
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    mod = STL(**class_kwargs)
    res = mod.fit(outer_iter=outer, inner_iter=inner)

    expected = results.loc['ntjump-1'].sort_index()
    assert_allclose(res.seasonal, expected.season)
    assert_allclose(res.trend, expected.trend)
    assert_allclose(res.weights, expected.rw)
def test_pandas(default_kwargs, robust):
    class_kwargs, _, _ = _to_class_kwargs(default_kwargs, robust)
    endog = pd.Series(class_kwargs['endog'], name='y')
    period = class_kwargs['period']
    mod = STL(endog=endog, period=period)
    res = mod.fit()
    assert isinstance(res.trend, pd.Series)
    assert isinstance(res.seasonal, pd.Series)
    assert isinstance(res.resid, pd.Series)
    assert isinstance(res.weights, pd.Series)
示例#13
0
def plot_time_trend(df, name):
    if name == "VN-INDEX":
        marker_color = HOSE_COLOR
    else:
        marker_color = HNX_COLOR

    stl = STL(df[df.index.year >= 2006]["Close"],
              period=250, seasonal=21,
              robust=True)
    res = stl.fit()
    fig = make_subplots(shared_xaxes=True,
                        rows=4, cols=1)
    fig.add_trace(go.Scatter(
        y=res.observed,
        x=res.observed.index,
        name="Orignal Index",
        showlegend=False,
        marker_color=marker_color

    ),
        row=1, col=1)
    fig.add_trace(go.Scatter(y=res.trend,
                             x=res.trend.index,
                             name="Trend",
                             showlegend=False,
                             marker_color=marker_color,
                             ),
                  row=2, col=1)
    fig.add_trace(go.Scatter(
        y=res.seasonal,
        x=res.seasonal.index,
        name="Season",
        showlegend=False,
        marker_color=marker_color
    ),
        row=3, col=1)
    fig.add_trace(go.Scatter(
        y=res.resid,
        x=res.resid.index,
        showlegend=False,
        marker_color=marker_color,
        name="Resid",
    ),
        row=4, col=1)
    # Update xaxis properties
    fig.update_yaxes(title_text="Orginal", row=1, col=1)
    fig.update_yaxes(title_text="Trend", row=2, col=1)
    fig.update_yaxes(title_text="Seasonal", row=3, col=1)
    fig.update_yaxes(title_text="Residuals", row=4, col=1)
    fig.update_layout(title=f"Seasonal-Trend Decomposition of {name}",
                      height=500
                      )

    return fig
def test_baseline_class(default_kwargs):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    mod = STL(**class_kwargs)
    res = mod.fit(outer_iter=outer, inner_iter=inner)

    expected = results.loc['baseline'].sort_index()
    assert_allclose(res.trend, expected.trend)
    assert_allclose(res.seasonal, expected.season)
    assert_allclose(res.weights, expected.rw)
    resid = class_kwargs['endog'] - expected.trend - expected.season
    assert_allclose(res.resid, resid)
示例#15
0
def testStationarity(df, keywords):

    for keyword in keywords:

        product = readData(df, keyword)

        stl = STL(product, seasonal=13)
        res = stl.fit()
        season = res.seasonal
        result = adfuller(season)

        if result[1] > 0.05:
            print(keyword, result[1])
def STL_decomposition(df, column, year):
    df = df[(df.date_c.dt.year == year)]
    df = df.sort_values(by="date_c")
    df = df[["date_c", column]]
    df = df.resample("1D", on="date_c").mean()[[column]]
    df = df.interpolate(method="time")
    series = df[column]

    stl = STL(series, period=29, robust=True)
    res = stl.fit()

    print("Trend mean = {}".format(res.trend.mean()), flush=True)
    return res
def predict(data, hyperparams):
    if hyperparams['seasonality']:
        stl_data = pd.Series(data=list(data.iloc[:, 1]),
                             index=list(data.iloc[:, 0]))
        stl = STL(stl_data, period=hyperparams['period'])
        resids = stl.fit().resid.values
        residual_df = pd.DataFrame(data={'residuals': resids})
        anomalies = anom_detect().evaluate(residual_df, col_name='residuals')
        anomalies_indices = list(anomalies.index)
    else:
        db = DBSCAN(eps=hyperparams['eps'],
                    min_samples=hyperparams['min_pts']).fit(data)
        anomalies_indices = np.argwhere(db.labels_ == -1).flatten().tolist()
    return anomalies_indices
示例#18
0
def stl_decomposition(series, period=12):
    """
    Run STL decomposition on a pandas Series object.
    Parameters
    ----------
    series : Series object
        The observations to be deseasonalised.
    period : int (optional)
        Length of the seasonal period in observations.
    """

    stl = STL(series, period, robust=True)
    res = stl.fit()
    return res
示例#19
0
 def fit_predict_ES(self, df, freq_period, steps=1):
     from statsmodels.tsa.holtwinters import ExponentialSmoothing
     self.freq_period = freq_period
     decomposition = STL(df["y"], period=freq_period + 1)
     decomposition = decomposition.fit()
     df.loc[:, 'trend'] = decomposition.trend
     df.loc[:, 'seasonal'] = decomposition.seasonal
     df.loc[:, 'residual'] = decomposition.resid
     fit_tres = ExponentialSmoothing(df["trend"], seasonal_periods=freq_period, seasonal='add').fit()
     prediction_trend = fit_tres.forecast(steps)
     fit_res = ExponentialSmoothing(df["residual"], seasonal_periods=freq_period, seasonal='add').fit()
     prediction_res = fit_res.forecast(steps)
     fit_sea = ExponentialSmoothing(df["seasonal"], seasonal_periods=freq_period, seasonal='add').fit()
     prediction_sea = fit_sea.forecast(steps)
     prediction = prediction_trend + prediction_res + prediction_sea
     return prediction
示例#20
0
def twitter_score(x, period=None, seasonal=45):
    '''
        Retorna os index dos valores que são anomalias
        input precisa ser um Serie com index temporal'''
    # filtrando o componente seasonal
    if period is not None:
        stl = STL(x, period=period, seasonal=seasonal)
    else:
        stl = STL(x, seasonal=seasonal)
    res = stl.fit()
    # calculamos o residuo
    residuo = x - np.nanmedian(x) - res.seasonal
    # Procuramos outliers com MAD
    mad = MAD()
    mad.fit(residuo)
    return mad.decision_function(residuo)
示例#21
0
def plot_stl(data: np.ndarray) -> DecomposeResult:
    stl = STL(data, period=12)
    res = stl.fit()
    fig, axs = plt.subplots(3, sharex=True, figsize=(10, 10))
    axs[0].set_title("Trend")
    axs[0].plot(res.trend, color="blue")
    axs[0].grid()
    axs[1].set_title("Seasonal")
    axs[1].plot(res.seasonal, color="royalblue")
    axs[1].grid()
    axs[2].set_title("Residual")
    axs[2].plot(res.resid, color="darkblue")
    axs[2].grid()
    plt.xticks(np.arange(0, len(data), 12))
    plt.show()

    return res
def seasonal_esd(ts,
                 periodicity=None,
                 hybrid=False,
                 max_anomalies=10,
                 alpha=0.05):
    """
    Compute the Seasonal Extreme Studentized Deviate of a time series.
    The steps taken are first to to decompose the time series into STL
    decomposition (trend, seasonality, residual). Then, calculate
    the Median Absolute Deviate (MAD) if hybrid (otherwise the median)
    and perform a regular ESD test on the residual, which we calculate as:
                    R = ts - seasonality - MAD or median

    Note: The statsmodel library requires a seasonality to compute the STL
    decomposition, hence the parameter seasonality. If none is given,
    then it will automatically be calculated to be 20% of the total
    timeseries.

    Args:
    ts (list or np.array): The timeseries to compute the ESD.
    periodicity (int): Number of time points for a season.
    hybrid (bool): See Twitter's research paper for difference.
    max_anomalies (int): The number of times the Grubbs' Test will be applied to the ts.
    alpha (float): The significance level.

    Returns:
    list int: The indices of the anomalies in the timeseries.

    """
    if max_anomalies >= len(ts) / 2:
        raise ValueError(
            'The maximum number of anomalies must be less than half the size of the time series.'
        )

    ts = np.array(ts)
    period = periodicity or int(
        0.2 * len(ts))  # Seasonality is 20% of the ts if not given.
    stl = STL(ts, period=period, robust=True)
    decomposition = stl.fit()
    residual = ts - decomposition.seasonal - np.median(ts)
    outliers = generalized_esd(residual,
                               max_anomalies=max_anomalies,
                               alpha=alpha,
                               hybrid=hybrid)
    return outliers
示例#23
0
def decompose(df, period=52):
    d = pd.DataFrame()
    stl = STL(df["Weekly_Sales"], period=period)
    tsd = stl.fit()

    resid = tsd.resid.to_frame().reset_index()
    resid["DateTime"] = df["DateTime"]

    trend = tsd.trend.to_frame().reset_index()
    trend["DateTime"] = df["DateTime"]

    season = tsd.seasonal.to_frame().reset_index()
    season["DateTime"] = df["DateTime"]
    d = df
    d.loc[:, "season"] = season["season"]
    d.loc[:, "trend"] = trend["trend"]
    d.loc[:, "resid"] = resid["resid"]
    return d
示例#24
0
def singleDecomp(df, keyword):

    dataFrame = df[df['keyword'].str.contains(keyword)]
    apfelstrudel = dataFrame['interest'].tolist()

    apfelstrudel = pd.Series(apfelstrudel,
                             index=pd.date_range('1-1-2017',
                                                 periods=len(apfelstrudel),
                                                 freq='W'),
                             name=keyword)

    stl = STL(apfelstrudel, seasonal=13)
    decomposition = stl.fit()

    #decomposition.plot()
    #plt.show()

    return decomposition
示例#25
0
def twitter(x, period=None, seasonal=45):
    '''
        Retorna os index dos valores que são anomalias
        input precisa ser um Serie com DateTimeIndex'''
    # filtrando o componente seasonal
    if period is not None:
        stl = STL(x, period=period, seasonal=seasonal)
    else:
        stl = STL(x, seasonal=seasonal)

    res = stl.fit()
    # calculamos o residuo
    residuo = x - np.nanmedian(x) - res.seasonal
    # Procuramos outliers com MAD
    mad = MAD()
    mad.fit(residuo)
    index = mad.predict(residuo).index
    return x.loc[index]
def clean(series):
    n_series = len(series)
    if n_series % 2 == 0:
        n_series = n_series - 1
    stl = STL(series, period=7, robust=True, seasonal=n_series)
    res = stl.fit()

    detrend = series - res.trend
    strength = 1 - np.var(res.resid) / np.var(detrend)
    if strength >= 0.6:
        series = res.trend + res.resid  # deseasonlized series

    tt = np.arange(len(series))
    model = SuperSmoother()
    model.fit(tt, series)
    yfit = model.predict(tt)
    resid = series - yfit

    resid_q = np.quantile(resid, [0.25, 0.75])
    iqr = np.diff(resid_q)
    #limits = resid.q + 3 * iqr * [-1, 1]
    limits = resid_q + 5 * iqr * [-1, 1]

    # Find residuals outside limits
    series_cleaned = series.copy()
    outliers = None
    if (limits[1] - limits[0]) > 1e-14:
        outliers = [
            a or b for a, b in zip((resid < limits[0]).to_numpy(), (
                resid > limits[1]).to_numpy())
        ]
        if any(outliers):
            series_cleaned.loc[outliers] = np.nan
            # Replace outliers
            id_outliers = [i for i, x in enumerate(outliers) if x]
            for ii in id_outliers:
                xx = [ii - 2, ii - 1, ii + 1, ii + 2]
                xx = [x for x in xx if x < series_cleaned.shape[0] and x >= 0]
                assert (len(xx) > 0)
                assert (not np.isnan(series_cleaned.iloc[xx]).to_numpy().all())
                series_cleaned.iloc[ii] = np.nanmedian(
                    series_cleaned.iloc[xx].to_numpy().flatten())

    return series_cleaned, outliers
示例#27
0
def nonStatKeywords(df, keywords):

    for keyword in keywords:

        dataFrame = df[df['keyword'].str.contains(keyword)]
        product = dataFrame['interest'].tolist()
        product = pd.Series(product,
                            index=pd.date_range('1-1-2017',
                                                periods=len(product),
                                                freq='W'),
                            name=keyword)

        stl = STL(product, seasonal=13)
        res = stl.fit()
        season = res.seasonal
        result = adfuller(season)

        if result[1] > 0.05:
            print(keyword, result[1])
示例#28
0
def stl_and_plot(file: str, seconds: int = 1_500_000):
    df = pd.read_csv(os.path.join('data', file))
    timestamps, values, interval = linear_interpolation(df.timestamp, df.value)
    values = (values - values.mean()) / values.std()
    timestamps = timestamps[:seconds // interval]
    values = values[:seconds // interval]

    start_time = datetime.datetime.fromtimestamp(timestamps[0])
    end_time = datetime.datetime.fromtimestamp(timestamps[-1])
    series = pd.Series(values,
                       index=pd.date_range(start=start_time,
                                           end=end_time,
                                           freq=pd.offsets.Second(interval)),
                       name=file)

    stl = STL(series, period=24 * 3600 // interval, seasonal=21)
    res = stl.fit()
    res.plot()
    plt.savefig(os.path.join('out', f'{file}.png'))
示例#29
0
    def compute_measures(self, var, window=None):
        """
        Computing some measures with the wind series
        Window is a dictionary with a keyword for the windoe size and a window length

        :return:
        """
        if self.raw_data is None:
            raise NameError("Raw data is not loaded")

        if var > self.raw_data.shape[1]:
            raise NameError("Invalid variable number")

        dvals = {}
        dvals['SpecEnt'] = spectral_entropy(self.raw_data[:, var], sf=1)
        dvals['SampEnt'] = sample_entropy(self.raw_data[:, var], order=2)

        data = self.raw_data[:, var]
        for w in window:
            print(w)
            lw = window[w]
            print(lw)
            length = int(data.shape[0] / lw)
            size = lw * length
            datac = data[:size]
            datac = datac.reshape(-1, lw)
            means = np.mean(datac, axis=1)
            vars = np.std(datac, axis=1)
            dvals[f'Stab{w}'] = np.std(means)
            dvals[f'Lump{w}'] = np.std(vars)
            # decompositions
            stl = STL(self.raw_data[:, var], period=lw)
            res = stl.fit()
            strength_seasonality = 1 - (np.var(
                res.resid)) / (np.var(res.observed - res.trend))
            strength_trend = 1 - (np.var(
                res.resid)) / (np.var(res.observed - res.seasonal))
            dvals[f'strength_seasonality{w}'] = strength_seasonality
            dvals[f'strength_trend{w}'] = strength_trend
            fig = res.plot()

        return dvals
示例#30
0
    def decompose(self, col=feature):
        # decomposition = sm.tsa.seasonal_decompose(pd.DataFrame(self.df[col]), model='Additive')
        # fig = decomposition.plot()
        a = self.df[col]
        b = a.index
        c = pd.infer_freq(b)

        d = pd.Series(a,
                      index=pd.date_range(b.date[0],
                                          b.date[len(a) - 1],
                                          periods=None,
                                          freq=c),
                      name=col)

        print(d)
        # x=pd.Series(p.values,index=p.index.values)
        stl = STL(d, seasonal=7)
        res = stl.fit()
        fig = res.plot()
        plt.show()