def test_oob_sarimax(): xreg = rs.rand(wineind.shape[0], 2) fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), out_of_sample_size=15).fit(y=wineind, exogenous=xreg) fit_no_oob = ARIMA( order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), out_of_sample_size=0, suppress_warnings=True)\ .fit(y=wineind[:-15], exogenous=xreg[:-15, :]) # now assert some of the same things here that we did in the former test oob = fit.oob() # compare scores: scoring = get_callable(fit_no_oob.scoring, VALID_SCORING) no_oob_preds = fit_no_oob.predict(n_periods=15, exogenous=xreg[-15:, :]) assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2) # show params are still the same assert np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2) # show we can add the new samples and get the exact same forecasts xreg_test = rs.rand(5, 2) fit_no_oob.add_new_observations(wineind[-15:], xreg[-15:, :]) assert np.allclose(fit.predict(5, xreg_test), fit_no_oob.predict(5, xreg_test), rtol=1e-2) # Show we can get a confidence interval out here preds, conf = fit.predict(5, xreg_test, return_conf_int=True) assert all(isinstance(a, np.ndarray) for a in (preds, conf))
def get_forecast(org_ts, forecast_periods, orders=(2, 1, 2), seasonal_orders=(0, 1, 1, 48), freq='30min'): ''' 获得预测的数据 :param org_ts: 原始的数据 :param forecast_periods: 预测多少个point :param orders: p d q的值。p、q分别和acf和pacf相关,d是差分的阶数建议先使用auto_arima( get_suitable_orders )测试出合适的值 :param seasonal_orders: 同上,最后一位是序列的周期 :param freq: 表示每一个point 之间的间隔 :return: 预测的值 ''' order, seasonal_order = orders, seasonal_orders stepwise_fit = ARIMA(order=order, seasonal_order=seasonal_order).fit(y=org_ts) forecast_ts = stepwise_fit.predict(n_periods=forecast_periods) forecasts_date_start = org_ts.index[-1] + (org_ts.index[-1] - org_ts.index[-2]) forecast_ts = pd.Series(forecast_ts, index=pd.date_range(forecasts_date_start, periods=forecast_periods, freq=freq)) return forecast_ts
def test_basic_arima(): arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True) preds = arima.fit_predict(y) # fit/predict for coverage # test some of the attrs assert_almost_equal(arima.aic(), 11.201308403566909, decimal=5) assert_almost_equal(arima.aicc(), 11.74676, decimal=5) assert_almost_equal(arima.bic(), 13.639060053303311, decimal=5) # get predictions expected_preds = np.array([ 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876 ]) # generate predictions assert_array_almost_equal(preds, expected_preds) # Make sure we can get confidence intervals expected_intervals = np.array([[-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139]]) _, intervals = arima.predict(n_periods=10, return_conf_int=True, alpha=0.05) assert_array_almost_equal(intervals, expected_intervals)
def test_with_seasonality1(): fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), suppress_warnings=True).fit(y=wineind) _try_get_attrs(fit) # R code AIC result is ~3004 assert abs(fit.aic() - 3004) < 100 # show equal within 100 or so # R code AICc result is ~3005 assert abs(fit.aicc() - 3005) < 100 # show equal within 100 or so # R code BIC result is ~3017 assert abs(fit.bic() - 3017) < 100 # show equal within 100 or so # show we can predict in-sample fit.predict_in_sample() # test with SARIMAX confidence intervals fit.predict(n_periods=10, return_conf_int=True, alpha=0.05)
def test_more_elaborate(): # show we can fit this with a non-zero order arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr) _try_get_attrs(arima) # can we fit this same arima with a made-up exogenous array? xreg = rs.rand(hr.shape[0], 4) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr, exogenous=xreg) _try_get_attrs(arima) # pickle this for the __get/setattr__ coverage. # since the only time this is tested is in parallel in auto.py, # this doesn't actually get any coverage proof... fl = 'some_temp_file.pkl' with open(fl, 'wb') as p: pickle.dump(arima, p) # show we can predict with this even though it's been pickled new_xreg = rs.rand(5, 4) _preds = arima.predict(n_periods=5, exogenous=new_xreg) # now unpickle with open(fl, 'rb') as p: other = pickle.load(p) # show we can still predict, compare _other_preds = other.predict(n_periods=5, exogenous=new_xreg) assert_array_almost_equal(_preds, _other_preds) # now clear the cache and remove the pickle file arima._clear_cached_state() os.unlink(fl) # now show that since we fit the ARIMA with an exogenous array, # we need to provide one for predictions otherwise it breaks. assert_raises(ValueError, arima.predict, n_periods=5, exogenous=None) # show that if we DO provide an exogenous and it's the wrong dims, we # also break things down. assert_raises(ValueError, arima.predict, n_periods=5, exogenous=rs.rand(4, 4))
def test_oob_for_issue_29(): dta = sm.datasets.sunspots.load_pandas().data dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008')) del dta["YEAR"] xreg = np.random.RandomState(1).rand(dta.shape[0], 3) # Try for cv on/off, various D levels, and various Xregs for d in (0, 1): for cv in (0, 3): for exog in (xreg, None): # surround with try/except so we can log the failing combo try: model = ARIMA(order=(2, d, 0), out_of_sample_size=cv)\ .fit(dta, exogenous=exog) # If exogenous is defined, we need to pass n_periods of # exogenous rows to the predict function. Otherwise we'll # just leave it at None if exog is not None: xr = exog[:3, :] else: xr = None _, _ = model.predict(n_periods=3, return_conf_int=True, exogenous=xr) except Exception as ex: print("Failing combo: d=%i, cv=%i, exog=%r" % (d, cv, exog)) # Statsmodels can be fragile with ARMA coefficient # computation. If we encounter that, pass: # ValueError: The computed initial MA coefficients are # not invertible. You should induce invertibility, # choose a different model order, or ... if "invertibility" in str(ex): pass else: raise
def test_with_oob(): # show we can fit with CV (kinda) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=10).fit(y=hr) assert not np.isnan(arima.oob()) # show this works # show we can fit if ooss < 0 and oob will be nan arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=-1).fit(y=hr) assert np.isnan(arima.oob()) # This will raise since n_steps is not an int assert_raises(TypeError, arima.predict, n_periods="5") # But that we CAN forecast with an int... _ = arima.predict(n_periods=5) # Show we fail if cv > n_samples assert_raises(ValueError, ARIMA(order=(2, 1, 2), out_of_sample_size=1000).fit, hr)
# Run ARIMA with found parameters stepwise = ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(10, 1, 12), seasonal_order=(4, 1, 2, 52), solver="lbfgs", suppress_warnings=True, transparams=True, trend="c") # Fit and predict print("Fitting and Predicting...") stepwise.fit(train.drop("WeekEnding", axis=1)) future = stepwise.predict(n_periods=len(test.index)) # Merge predictions with raw data future = pd.DataFrame(future, index=test["WeekEnding"], columns=["Forecast"]) df = df.set_index("WeekEnding").join(future, how="outer") forecast = df.dropna() # Plot vs actual data plt.plot(df) plt.xlabel("Date") plt.ylabel("Lower 48 Inventory (Bcf)") plt.show() plt.plot(forecast)
def test_oob_for_issue_28(): # Continuation of above: can we do one with an exogenous array, too? xreg = rs.rand(hr.shape[0], 4) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=10).fit(y=hr, exogenous=xreg) oob = arima.oob() assert not np.isnan(oob) # Assert that the endog shapes match. First is equal to the original, # and the second is the differenced array, with original shape - d. assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2) assert arima.arima_res_.model.endog.shape[0] == hr.shape[0] - 1 # Now assert the same for exog assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2) assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0] - 1 # Compare the OOB score to an equivalent fit on data - 10 obs, but # without any OOB scoring, and we'll show that the OOB scoring in the # first IS in fact only applied to the first (train - n_out_of_bag) # samples arima_no_oob = ARIMA( order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=0)\ .fit(y=hr[:-10], exogenous=xreg[:-10, :]) scoring = get_callable(arima_no_oob.scoring, VALID_SCORING) preds = arima_no_oob.predict(n_periods=10, exogenous=xreg[-10:, :]) assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2) # Show that the model parameters are exactly the same xreg_test = rs.rand(5, 4) assert np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2) # Now assert on the forecast differences. with_oob_forecasts = arima.predict(n_periods=5, exogenous=xreg_test) no_oob_forecasts = arima_no_oob.predict(n_periods=5, exogenous=xreg_test) assert_raises(AssertionError, assert_array_almost_equal, with_oob_forecasts, no_oob_forecasts) # But after we update the no_oob model with the latest data, we should # be producing the same exact forecasts # First, show we'll fail if we try to add observations with no exogenous assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:], None) # Also show we'll fail if we try to add mis-matched shapes of data assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:], xreg_test) # Show we fail if we try to add observations with a different dim exog assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:], xreg_test[:, 2]) # Actually add them now, and compare the forecasts (should be the same) arima_no_oob.add_new_observations(hr[-10:], xreg[-10:, :]) assert np.allclose(with_oob_forecasts, arima_no_oob.predict(n_periods=5, exogenous=xreg_test), rtol=1e-2)
def forecasting_sales(): try: period = request.args.get('period') data = pd.read_csv( 'http://robsonfernandes.net/mestrado/data/food-sp.csv') print('Passou 00') variavel = 'VENDA' data.index = data['DATA'] interval = 96 - int(period) df_train = data.iloc[1:interval, ] df_test = data.iloc[interval:96, ] df_train[variavel + '_box'], lmbda = stats.boxcox(df_train[variavel]) print('Passou 01') # model = auto_arima(df_train[variavel+'_box'], # n_fits=10, # start_p=0, # start_q=0, # max_p=5, # max_q=5, # m=20, # start_P=0, # d=1, # D=1, # trace=True, # stationary=False, # error_action='ignore', # suppress_warnings=True, # stepwise=True) model = ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(1, 1, 1), out_of_sample_size=0, scoring='mse', scoring_args={}, seasonal_order=(2, 1, 1, 20), solver='lbfgs', start_params=None, suppress_warnings=True, transparams=True, trend='c') model.fit(df_train[variavel + '_box']) # model.summary() forecast = model.predict(n_periods=int(period)) y_pred = invboxcox(forecast, lmbda) y_true = df_test[variavel].values print('Passou 02') acuracia = round(100 - mean_absolute_percentage_error(y_true, y_pred), 0) retorno = { 'acuracia': acuracia, 'real': y_true.tolist(), 'previsto': y_pred.tolist() } return jsonify(retorno) except Exception: raise
class AutoArima(SupervisedLearnerPrimitiveBase[Inputs, Outputs, ArimaParams, ArimaHyperparams]): __author__ = 'USC ISI' metadata = hyperparams.base.PrimitiveMetadata({ # Required "id": 'b2e4e8ea-76dc-439e-8e46-b377bf616a35', "version": config.VERSION, "name": "DSBox Arima Primitive", "description": "Arima primitive for timeseries data regression/forcasting problems, transferred from pyramid/Arima", "python_path": "d3m.primitives.time_series_forecasting.Arima.DSBOX", "primitive_family": "TIME_SERIES_FORECASTING", "algorithm_types": ["AUTOREGRESSIVE_INTEGRATED_MOVING_AVERAGE"], "source": { "name": config.D3M_PERFORMER_TEAM, "contact": config.D3M_CONTACT, "uris": [config.REPOSITORY] }, "keywords": ["Transform", "Timeseries", "Aggregate"], "installation": [config.INSTALLATION], "precondition": ["NO_MISSING_VALUES", "NO_CATEGORICAL_VALUES"], }) def __init__(self, *, hyperparams: ArimaHyperparams, random_seed: int = 0, docker_containers: Dict[str, DockerContainer] = None, _verbose: int = 0) -> None: super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) if self.hyperparams["is_seasonal"]: seasonal_order = self.hyperparams["seasonal_order"] else: seasonal_order = None self._clf = ARIMA( order=(self.hyperparams["P"], self.hyperparams["D"], self.hyperparams["Q"]), seasonal_order=seasonal_order, # seasonal_order=self.hyperparams["seasonal_order"], # seasonal_order=(0,1,1,12), # start_params=self.hyperparams["start_params"], # start_params = None, transparams=self.hyperparams["transparams"], method=self.hyperparams["method"], trend=self.hyperparams["trend"], solver=self.hyperparams["solver"], maxiter=self.hyperparams["maxiter"], disp=self.hyperparams["disp"], # callback=self.hyperparams["callback"], callback=None, suppress_warnings=self.hyperparams["suppress_warnings"], out_of_sample_size=False, scoring="mse", scoring_args=None ) self._training_inputs = None self._training_outputs = None self._target_names = None self._training_indices = None self._fitted = False self._length_for_produce = 0 def set_training_data(self, *, inputs: Inputs) -> None: inputs_timeseries = d3m_dataframe(inputs.iloc[:, -1]) inputs_d3mIndex = d3m_dataframe(inputs.iloc[:, 0]) if len(inputs_timeseries) == 0: print( "Warning: Inputs timeseries data to timeseries_featurization primitive's length is 0.") return column_name = inputs_timeseries.columns[0] self._training_inputs, self._target_names = inputs_timeseries, column_name self._training_outputs = inputs_timeseries def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: if self._fitted: return CallResult(None) if self._training_inputs is None or self._training_outputs is None: raise ValueError("Missing training data.") arima_training_output = d3m_ndarray(self._training_outputs) shape = arima_training_output.shape if len(shape) == 2 and shape[1] == 1: sk_training_output = np.ravel(arima_training_output) self._clf.fit(sk_training_output) self._fitted = True return CallResult(None) def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: arima_inputs = inputs if self.hyperparams['use_semantic_types']: sk_inputs = inputs.iloc[:, self._training_indices] sk_output = self._clf.predict(n_periods=len(arima_inputs)) output = d3m_dataframe(sk_output, generate_metadata=False, source=self) output.metadata = inputs.metadata.clear( source=self, for_value=output, generate_metadata=True) output.metadata = self._add_target_semantic_types( metadata=output.metadata, target_names=self._target_names, source=self) if not self.hyperparams['use_semantic_types']: return CallResult(output) # outputs = common_utils.combine_columns(return_result=self.hyperparams['return_result'], # add_index_columns=self.hyperparams['add_index_columns'], # inputs=inputs, column_indices=self._training_indices, columns_list=[output], source=self) return CallResult(output) def get_params(self) -> ArimaParams: return Params(arima=self._clf) def set_params(self, *, params: ArimaParams) -> None: self._clf = params["arima"] @classmethod def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: ArimaHyperparams): if not hyperparams['use_semantic_types']: return inputs, list(range(len(inputs.columns))) inputs_metadata = inputs.metadata def can_produce_column(column_index: int) -> bool: return cls._can_produce_column(inputs_metadata, column_index, hyperparams) columns_to_produce, columns_not_to_produce = common_utils.get_columns_to_use(inputs_metadata, use_columns=hyperparams['use_columns'], exclude_columns=hyperparams[ 'exclude_columns'], can_use_column=can_produce_column) return inputs.iloc[:, columns_to_produce], columns_to_produce # return columns_to_produce @classmethod def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: ArimaHyperparams) -> bool: column_metadata = inputs_metadata.query( (metadata_base.ALL_ELEMENTS, column_index)) semantic_types = column_metadata.get('semantic_types', []) if len(semantic_types) == 0: cls.logger.warning("No semantic types found in column metadata") return False if "https://metadata.datadrivendiscovery.org/types/Attribute" in semantic_types: return True return False @classmethod def _get_targets(cls, data: d3m_dataframe, hyperparams: ArimaHyperparams): if not hyperparams['use_semantic_types']: return data, [] target_names = [] target_column_indices = [] metadata = data.metadata target_column_indices.extend(metadata.get_columns_with_semantic_type( 'https://metadata.datadrivendiscovery.org/types/TrueTarget')) for column_index in target_column_indices: if column_index is metadata_base.ALL_ELEMENTS: continue column_index = typing.cast( metadata_base.SimpleSelectorSegment, column_index) column_metadata = metadata.query( (metadata_base.ALL_ELEMENTS, column_index)) target_names.append(column_metadata.get('name', str(column_index))) targets = data.iloc[:, target_column_indices] return targets, target_names @classmethod def _add_target_semantic_types(cls, metadata: metadata_base.DataMetadata, source: typing.Any, target_names: List = None,) -> metadata_base.DataMetadata: for column_index in range(metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']): metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/Target', source=source) metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index), 'https://metadata.datadrivendiscovery.org/types/PredictedTarget', source=source) if target_names: metadata = metadata.update((metadata_base.ALL_ELEMENTS, column_index), { 'name': target_names[column_index], }, source=source) return metadata # functions to fit in devel branch of d3m (2019-1-17) def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult: """ A method calling ``fit`` and after that multiple produce methods at once. This method allows primitive author to implement an optimized version of both fitting and producing a primitive on same data. If any additional method arguments are added to primitive's ``set_training_data`` method or produce method(s), or removed from them, they have to be added to or removed from this method as well. This method should accept an union of all arguments accepted by primitive's ``set_training_data`` method and produce method(s) and then use them accordingly when computing results. The default implementation of this method just calls first ``set_training_data`` method, ``fit`` method, and all produce methods listed in ``produce_methods`` in order and is potentially inefficient. Parameters ---------- produce_methods : Sequence[str] A list of names of produce methods to call. inputs : Inputs The inputs given to ``set_training_data`` and all produce methods. outputs : Outputs The outputs given to ``set_training_data``. timeout : float A maximum time this primitive should take to both fit the primitive and produce outputs for all produce methods listed in ``produce_methods`` argument, in seconds. iterations : int How many of internal iterations should the primitive do for both fitting and producing outputs of all produce methods. Returns ------- MultiCallResult A dict of values for each produce method wrapped inside ``MultiCallResult``. """ return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs)