def train_arima_model(data_train, date_init, date_fin, op_red, type_day, transform='decompose-Fourier' , type_decompose='additive', n_decompose=1, n_coeff_fourier=4, filter_decompose=None): num_cluster = data_train.name data_train = np.array(data_train)[~np.isnan(np.array(data_train))] type_model = 'arima' if transform == 'decompose-Fourier' or transform == 'decompose-Fourier-log': print('n_decompose: ', n_decompose, 'n_coeff_fourier: ', n_coeff_fourier) forecast_seasonal, trend_residual, n_diffs, periods_decompose, m_f, k_f = get_transform_model(data_train, transform=transform , type_decompose=type_decompose , n_decompose=n_decompose , n_coeff_fourier=n_coeff_fourier) pipeline_trend_residual = Pipeline([ ('fourier', ppc.FourierFeaturizer(m=m_f, k=k_f)) , ("model", pm.AutoARIMA(d=n_diffs, seasonal=False, trace=True, error_action='ignore' , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))]) print('\t\t\t training model...') pipeline_trend_residual.fit(trend_residual) print(pipeline_trend_residual.summary()) # aic_model = pipeline_trend_residual.steps[-1][1].model_.aic() print('\t\t\t saving model...') save_model_dir(pipeline_trend_residual, transform, num_cluster, op_red, type_day, type_model, date_init , date_fin, periods_decompose, str(n_decompose), type_decompose) print('\t\t\t finish save model...') elif transform == 'Fourier': n_diffs, m_f, k_f = get_transform_model(data_train, transform=transform, n_coeff_fourier=n_coeff_fourier) pipeline = Pipeline([ ('fourier', ppc.FourierFeaturizer(m=m_f, k=k_f)) , ("model", pm.AutoARIMA(d=n_diffs, seasonal=False, trace=True, error_action='ignore' , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))]) pipeline.fit(data_train) save_model_dir(pipeline, transform, num_cluster, op_red, type_day, type_model, date_init, date_fin) elif transform == 'decompose': forecast_seasonal, trend_residual, n_diffs, ns_diffs, periods_decompose, m_f = get_transform_model(data_train , transform=transform , type_decompose=type_decompose , n_decompose=n_decompose) pipeline_trend_residual = Pipeline( [("model", pm.AutoARIMA(d=n_diffs, D=ns_diffs, seasonal=True, m=m_f, trace=True, error_action='ignore' , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))]) pipeline_trend_residual.fit(trend_residual) save_model_dir(pipeline_trend_residual, transform, num_cluster, op_red, type_day, type_model, date_init , date_fin, periods_decompose, str(n_decompose), type_decompose) elif transform == 'normal': n_diffs, ns_diffs, m_f = get_transform_model(data_train, transform=transform) pipeline = Pipeline( [("model", pm.AutoARIMA(d=n_diffs, D=ns_diffs, seasonal=True, m=m_f, trace=True, error_action='ignore' , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))]) pipeline.fit(data_train) save_model_dir(pipeline, transform, num_cluster, op_red, type_day, type_model, date_init, date_fin) else: raise ValueError('invalid variable transform {}.'.format(transform))
def test_AutoARIMA_class(): train, test = wineind[:125], wineind[125:] mod = pm.AutoARIMA(maxiter=5) mod.fit(train) endog = mod.model_.arima_res_.data.endog assert_array_almost_equal(train, endog) # update mod.update(test, maxiter=2) new_endog = mod.model_.arima_res_.data.endog assert_array_almost_equal(wineind, new_endog)
import pmdarima as pm from pmdarima.model_selection import train_test_split from pmdarima.pipeline import Pipeline from pmdarima.preprocessing import BoxCoxEndogTransformer import pickle # Load/split your data y = pm.datasets.load_sunspots() train, test = train_test_split(y, train_size=2700) # Define and fit your pipeline pipeline = Pipeline([ ('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)), # lmbda2 avoids negative values ('arima', pm.AutoARIMA(seasonal=True, m=12, suppress_warnings=True, trace=True)) ]) pipeline.fit(train) # Serialize your model just like you would in scikit: with open('model.pkl', 'wb') as pkl: pickle.dump(pipeline, pkl) # Load it and make predictions seamlessly: with open('model.pkl', 'rb') as pkl: mod = pickle.load(pkl) print(mod.predict(15)) # [25.20580375 25.05573898 24.4263037 23.56766793 22.67463049 21.82231043 # 21.04061069 20.33693017 19.70906027 19.1509862 18.6555793 18.21577243 # 17.8250318 17.47750614 17.16803394]
def forecast(self, forecast_horizon: int = 96): super().forecast(forecast_horizon) print( "Running ARIMA forecast for Currency-pair: {} using forecast horizon: {}", self.currency_pair.upper(), forecast_horizon) print("Dataset: ", self.currency_pair.upper()) print(self.training_data.head(5)) print(".....\t.........\t...") print(self.training_data.tail(5)) # define and fit the pipeline/model pipeline = Pipeline([('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)), ('arima', pm.AutoARIMA(start_p=1, start_q=1, max_p=3, max_q=3, d=1, D=1, start_P=0, error_action='ignore', suppress_warnings=True, stepwise=True, seasonal=True, m=12, trace=True))]) pipeline.fit(self.training_data['close']) # model = pm.auto_arima(self.training_data["close"], seasonal=True, m=12) # serialize model model_file = f"intermediates/arima_{self.currency_pair}.pkl" with open(model_file, "wb") as file: pickle.dump(pipeline, file) # load model and make predictions seamlessly with open(model_file, "rb") as file: model = pickle.load(file) # make the forecasts predictions = model.predict(n_periods=forecast_horizon, return_conf_int=True) print("ARIMA forecast ... complete") collated_results = DataFrame.from_records([{ "forecast": value, "error": abs(bounds[0] - bounds[1]) / 2, "forecast_lower": bounds[0], "forecast_upper": bounds[1] } for value, bounds in zip(predictions[0], predictions[1])]) self.forecasts = collated_results["forecast"] self.errors = collated_results["error"] self.forecasts_lower = collated_results["forecast_lower"] self.forecasts_upper = collated_results["forecast_upper"] self.forecasts_raw = collated_results collated_results.to_csv( f"output/{self.currency_pair}__{self.model_name.lower()}__{forecast_horizon}__forecasts.csv" ) print(collated_results)
prefix = valfiles_oi[ind].split( '_')[0] + '-validation-{}d-'.format(pred) #滑动窗口 for i in range(past + pred - 1, len(price)): print( '===========当前训练的是{}数据集,目标节点是{}=================='.format( valfiles_oi[ind].split('_')[0], val_3m.index[(i - (past + pred) + 1)])) sample = price[(i - (past + pred) + 1):(i + 1)] train, test = train_test_split(sample, train_size=past) pipeline = Pipeline([ # ('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)), # lmbda2 avoids negative values ('arima', pm.AutoARIMA(seasonal=True, m=1, suppress_warnings=True, trace=True, error_action="ignore")) ]) pipeline.fit(train) pred_result = pipeline.predict(pred) print('pred_result is : ', pred_result) print( '====================一次训练结束=============================\n\n\n' ) val_index = prefix + val_3m.index[(i - (past + pred) + 1)] if use_diff: val_label = 1 if pred_result[-1] > 0 else 0 else: