示例#1
0
 def _tune(self,
           y,
           period,
           x=None,
           metric="mse",
           val_size=None,
           verbose=False):
     """
     Tune hyperparameters of the model.
     :param y: pd.Series or 1-D np.array, time series to predict.
     :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly"
     for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly
     data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m",
     "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/.
     :param x: not used for TBATS model
     :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute
     error).
     :param val_size: Int, the number of most recent observations to use as validation set for tuning.
     :param verbose: Boolean, True for printing additional info while tuning.
     :return: None
     """
     self.period = data_utils.period_to_int(period) if type(
         period) == str else period
     val_size = int(len(y) * .1) if val_size is None else val_size
     pipe = pipeline.Pipeline([
         ("fourier", FourierFeaturizer(
             self.period,
             self.period / 2)),  # TODO: Tune no. of Fourier terms as well?
         ("arima",
          auto_arima(y,
                     m=self.period,
                     seasonal=False,
                     d=None,
                     information_criterion='oob',
                     maxiter=100,
                     error_action='ignore',
                     suppress_warnings=True,
                     stepwise=True,
                     max_order=None,
                     out_of_sample_size=val_size,
                     scoring=metric,
                     exogenous=x))
     ])
     self.params.update(pipe.steps[1][1].get_params())
     self.params["tuned"] = True
示例#2
0
 def fit(self,
         y,
         period,
         x=None,
         metric="mse",
         val_size=None,
         verbose=False):
     """
     Build the model using best-tuned hyperparameter values.
     :param y: pd.Series or 1-D np.array, time series to predict.
     :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly"
     for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly
     data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m",
     "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/.
     :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional
     :param metric: Str, the metric used for model selection. One of "mse" (mean squared error), "mae" (mean absolute
     error).
     :param val_size: Int, the number of most recent observations to use as validation set for tuning.
     :param verbose: Boolean, True for printing additional info while tuning.
     :return: None
     """
     self.y = y
     self.name = "Fourier ARIMA"
     self.key = "fourier_sarima"
     self._tune(y=y,
                period=period,
                x=x,
                metric=metric,
                val_size=val_size,
                verbose=verbose)
     pipe = pipeline.Pipeline([
         ("fourier", FourierFeaturizer(self.period, self.period / 2)),
         ("arima",
          arima.ARIMA(maxiter=100,
                      order=self.params["order"],
                      seasonal_order=self.params["seasonal_order"],
                      suppress_warnings=True))
     ])
     self.model = pipe.fit(y, exogenous=x)
示例#3
0
    vector_serie.append(vector[s])

    ##########################################################33
    ########## MODELO AUTOARIMA

    errores_proceso_ar = []
    try:
        # Let's create a pipeline with multiple stages... the Wineind dataset is
        # seasonal, so we'll include a FourierFeaturizer so we can fit it without
        # seasonality
        pipe = pipeline.Pipeline([
            ("fourier", ppc.FourierFeaturizer(
                m=5)),  #modela la estacionalidad con periodicidad 5
            (
                "arima",
                arima.AutoARIMA(
                    stepwise=True,
                    trace=1,
                    error_action="ignore",
                    seasonal=False,  # because we use Fourier
                    suppress_warnings=True))
        ])

        pipe.fit(train_arima)
        print("Model fit:")
        print(pipe)

        # We can compute predictions the same way we would on a normal ARIMA object:
        forecast_arima = pipe.predict(n_periods=int(len(test)))

        rmse_test_arima = mean_squared_error(test_arima,
                                             forecast_arima,
示例#4
0
import numpy as np
import pmdarima as pm
from pmdarima import pipeline, preprocessing as ppc, arima
from matplotlib import pyplot as plt

# Load the data and split it into separate pieces
data = pm.datasets.load_wineind()
train, test = data[:150], data[150:]

# Let's create a pipeline with multiple stages... the Wineind dataset is
# seasonal, so we'll include a FourierFeaturizer so we can fit it without
# seasonality
pipe = pipeline.Pipeline([
    ("fourier", ppc.FourierFeaturizer(m=12, k=4)),
    ("arima", arima.AutoARIMA(stepwise=True, trace=1, error_action="ignore",
                              seasonal=False,  # because we use Fourier
                              transparams=False,
                              suppress_warnings=True))
])

pipe.fit(train)
print("Model fit:")
print(pipe)

# We can compute predictions the same way we would on a normal ARIMA object:
preds, conf_int = pipe.predict(n_periods=10, return_conf_int=True)
print("\nForecasts:")
print(preds)

# Let's take a look at the actual vs. the predicted values:
fig, axes = plt.subplots(2, 1, figsize=(12, 8))
def Arima(df, dataset, months, var):
    #     if dataset=='iig_maitri' or dataset=='iig_bharati':
    #         data2=pd.read_csv('datasets/'+dataset+'.csv')
    #         # print(df)
    #     elif dataset=='dcwis':
    #         data2=pd.read_csv('datasets/'+dataset+'.csv', names=['obstime', 'tempr', 'ap', 'ws', 'rh', 'dew'])
    df['obstime'] = pd.to_datetime(df['obstime'])
    df = df.set_index('obstime')
    data2 = df[var]
    # print(data2)
    # train2, test2= data2[:23423], data2[23423:]
    # train =train2.resample('M').mean()
    #     ds_temp=df[var].resample('M').mean()
    if var == 'rh':
        data2 = data2[data2 > 10]
    if var == 'ws':
        data2 = data2[data2 >= 0]
    if var == 'ap':
        data2 = data2[data2 > -10]
    data = data2.resample('M').mean()
    # print(data)
    # data.dropna(inplace=True)
    # train.dropna(inplace=True)
    # test.dropna(inplace=True)
    #     data['Date']=data.index.strftime('%B')
    #     data['Date']=data['Date']+' '+data.index.strftime('%Y')
    # data.set_index('Date', inplace=True)
    #     datum=data
    data.dropna(inplace=True)
    # print(data.size)
    ind = list()
    for i in range(int(data.size)):
        ind.append(i)
    # print(data.Date)
    # print(ind)
#     Q1 = data.quantile(0.25)
#     Q3 = data.quantile(0.75)
#     IQR = Q3 - Q1
#     # print(IQR)
#     data = data[~((data < (Q1 - 1.5 * IQR)) |(data > (Q3 + 1.5 * IQR)))]
# data.shape
# Let's create a pipeline with multiple stages... the Wineind dataset is
# seasonal, so we'll include a FourierFeaturizer so we can fit it without
# seasonality
    pipe = pipeline.Pipeline([
        ("fourier", ppc.FourierFeaturizer(m=12)),
        (
            "arima",
            arima.AutoARIMA(
                stepwise=True,
                trace=1,
                error_action="ignore",
                seasonal=False,  # because we use Fourier
                transparams=False,
                suppress_warnings=True))
    ])

    pipe.fit(data)
    # print("Model fit:")
    #     # print(pipe)
    #     months=12
    dates = []
    for year in range(2012, 2016 + int(months / 6)):
        for month in range(1, 13):
            dates.append(dt.datetime(year=year, month=month, day=28))
    preds, conf_int = pipe.predict(n_periods=months, return_conf_int=True)
    datum = data
    data3 = data2.resample('M').mean()
    # print(dates)
    # print(data3[dt()'2012-01-31'])
    # for i, j in zip(data3.index, range(data3.size)):
    #     if math.isnan(data3[i])==True:
    #        del dates[j]
    # print(datum)
    temp = np.append(data3, preds)
    # dates=np.array(dates)
    # print(np.array(dates).shape)
    # # print
    # print(temp.shape)
    # plt.subplot(211)
    plt.plot(dates[:temp.size], temp)
    plt.plot(datum)
示例#6
0
data3 = pm.datasets.load_wineind()
train, test = data3[:150], data[150:]
lenSeq = 10000
subSamp = 40
f0SamplesSS = 10
f0Samples = 400

data.shape
data = data[:lenSeq]
dataSS = data[::subSamp]

with StepwiseContext(max_steps=2):
  pipe = pipeline.Pipeline([
      ("fourier", ppc.FourierFeaturizer(m=f0Samples)),
      ("arima", arima.AutoARIMA(stepwise=True, maxiter=20, with_intercept = False, start_p=5, start_q=4,  max_p= 6, max_q= 6,  trace=1, error_action="ignore",
                              seasonal=False,  # because we use Fourier
                              suppress_warnings=True))
  ])

  pipe.fit(data)
  yhat = pipe.predict(n_periods=1000)

#from pyramid.arima import auto_arima
#f0Samples = 10 # fs = 44100, f0 = 110 (A2), therefore f0 in samples is approx 400
#thissa = pm.auto_arima(train, error_action='ignore', seasonal=True, m=12)
#thissarimaSS =  pm.auto_arima(dataSS, with_intercept = False, d = 0, D = 0, start_p=0, start_q=0,test='adf',  max_p= 3, max_q= 3,     m=f0SamplesSS,start_P=0, start_Q= 0, max_Q=3, max_P=3, trace=True,error_action='ignore', suppress_warnings=True)

#thissarimaS1 =  pm.auto_arima(data, with_intercept = False, d = 0, D = 0, start_p=0, start_q=0,test='adf',  max_p= 3, max_q= 3,     m=f0Samples,start_P=0, start_Q= 0, max_Q=3, max_P=3, trace=True,error_action='ignore', suppress_warnings=True)
#thissarima =  pm.auto_arima(data, with_intercept = False, d = 0, start_p=0, start_q=0,test='adf',  max_p= 5, max_q= 5,  seasonal=False, trace=True,error_action='ignore', suppress_warnings=True)
#paramsSS = thissarimaSS.get_params([0])
#sos = paramsSS.get('seasonal_order')
n_diffs = arima.ndiffs(y_train, max_d=5)

# Here's what the featurizer will create for us:
date_feat = preprocessing.DateFeaturizer(
    column_name="date",  # the name of the date feature in the exog matrix
    with_day_of_week=True,
    with_day_of_month=True)

_, X_train_feats = date_feat.fit_transform(y_train, X_train)
print("Head of generated exog features:\n%s" % repr(X_train_feats.head()))

# We can plug this exog featurizer into a pipeline:
pipe = pipeline.Pipeline([
    ('date', date_feat),
    ('arima', arima.AutoARIMA(d=n_diffs,
                              trace=3,
                              stepwise=True,
                              suppress_warnings=True,
                              seasonal=False))
])

pipe.fit(y_train, X_train)

# Plot our forecasts
forecasts = pipe.predict(exogenous=X_test)

fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(1, 1, 1)

n_train = y_train.shape[0]
x = np.arange(n_train + forecasts.shape[0])