def div_predict(sp500_div): pmax = int(len(sp500_div) / 100) qmax = int(len(sp500_div) / 100) from sklearn.preprocessing import StandardScaler, scale # ssssss = StandardScaler() bix = [] for p in range(pmax + 1): tm = [] for q in range(qmax + 1): try: tm.append(ARIMA(sp500_div, order=(p, 1, q)).fit().bic) except: tm.append(None) bix.append(tm) import matplotlib.pyplot as plt find = pd.DataFrame(bix) find.columns.name = 'p' find.index.name = 'q' q = find.unstack().astype('float32') print('q,p: ', q.idxmin()) clf = ARIMA(sp500_div, order=(3, 1, 2)).fit() #(3,2) print(clf.summary()) plt.figure() clf.plot_predict() plt.show() return clf.forecast(1)[0]
def predict_stock(stock_close_rtn, n_steps=5, plot=False): """Given the close returns of a stock (as a dataframe), predict the next n_step values""" diff_series = (stock_close_rtn - stock_close_rtn.shift()).dropna() p, q = find_params_arima(diff_series) model = ARIMA(stock_close_rtn.values, (p, q, 0)).fit(disp=plot) predicted = model.predict(end=n_steps) if plot: model.plot_predict( len(stock_close_rtn) - 10, len(stock_close_rtn) + n_steps) plt.axhline(y=0, linestyle='--', color='gray') return predicted
print(model.resid.plot()) print(plot_acf(model.resid, lags = 50)) #%% ARIMA arima_model = ARIMA(dftaxi_day, (28,1, 1)).fit() arima_model.summary() print(arima_model.resid.plot()) print(plot_acf(arima_model.resid, lags = 50)) #%% Predict #arima_model.predict(1,100).plot() import matplotlib.pyplot as plt fig, ax = plt.subplots() ax = train.plot(ax=ax) fig = arima_model.plot_predict('2016-10-01', '2016-12-31', ax=ax, plot_insample=False) ############################################################################################################### ############################################################################################################### #%% print (train) #train.plot(y='response_variable',kind='line') #%% Train/Test Split n = len(dftaxi_day.response_variable) train = dftaxi_day.response_variable[:int(.75*n)] test = dftaxi_day.response_variable[int(.75*n):] #%% print(test) #%% Train autocorrelation print (train.autocorr(lag=1)) # 0.61 print (train.autocorr(lag=7)) # 0.78
# fc_series = pd.Series(fc, index=lst_day_validation.index) # lower_series = pd.Series(conf[:, 0], index=lst_day_validation.index) # upper_series = pd.Series(conf[:, 1], index=lst_day_validation.index) # # plt.figure(figsize=(12, 5), dpi=300) # plt.plot(lst_day_train, label='training') # plt.plot(lst_day_validation, label='validation') # plt.plot(fc_series, label='forecast') # plt.title('Forecast vs Actuals') # plt.legend(loc='upper left', fontsize=12) # plt.show() # Build Model print(lst_day_train) model = ARIMA(base_temperatures['lst_day'], order=(1, 1, 0)).fit() model.plot_predict(dynamic=False) plt.figure(figsize=(12, 5), dpi=300) plt.plot(base_temperatures['lst_day'].diff()) plt.plot(model.fittedvalues, color='red') plt.show() predictions_ARIMA_diff = pd.Series(model.fittedvalues, copy=True) x, x_diff = base_temperatures['lst_day'].iloc[0], predictions_ARIMA_diff.iloc[ 1:] predictions_ARIMA = np.r_[x, x_diff].cumsum().astype(float) print(len(predictions_ARIMA)) print(len(base_temperatures)) predictions = pd.Series(predictions_ARIMA, index=base_temperatures['lst_day'][1:].index) print(model.fittedvalues.tail()) print(predictions.tail())
plt.show() # Train the ARIMA model y_train = y[0:90] y_test = y[90:] arima = ARIMA(y_train, order=(6, 1, 2), missing="drop").\ fit(transparams=True, maxiter=500, trend="c") y_pred_arima = arima.predict(start=90, end=99) # Show the results sns.set() fig, ax = plt.subplots(figsize=(18, 8)) ax.plot(y_test, linewidth=1.0, color="r", label="Data") arima.plot_predict(start=90, end=99, plot_insample=False, dynamic=True, ax=ax) ax.set_xlabel("Time", fontsize=16) ax.set_ylabel("Measure", fontsize=16) ax.set_title("ARMA(6, 1, 2) prediction", fontsize=16) ax.legend(fontsize=16) plt.show()
# predict_dta = arma.predict(start='2016-10-14-00', end='2016-10-14-23', dynamic=True,) # print(predict_dta) # # # arma.plot_predict(start='2016-10-14-00', end='2016-10-14-23', dynamic=True,) # # plt.show() # # pred = pd.Series(np.array(list(predict_dta), dtype=float), index=pd.date_range(start='2016-10-14', periods=24, freq='H')) # # time.plot() # pred.plot() # plt.show() model = ARIMA(time, order=(5, 1, 1), freq='H').fit() predict_dta = model.predict( start='2016-10-14-00', end='2016-10-14-23', dynamic=True, ) print(predict_dta) model.plot_predict( start='2016-10-14-00', end='2016-10-14-23', dynamic=True, ) plt.show() # pred = model.forecast(10) # print(pred)
order_trend = arma_order_select_ic(data['ts1'].dropna()) print(order_trend['bic_min_order']) #这里的选择和书中的一样 #4.拟合 result_trend = ARIMA(data['index'], (0, 1, 1)).fit() print(result_trend.params) #后边的步骤其实和ARMA一样了 #5.模型拟合度检验 #(1)残差的白噪声检验 output3 = acorr_ljungbox(result_trend.resid, boxpierce=True, lags=[6, 12], return_df=True) print(output3) #拟合后的白噪声检测效果很好,充分的大于了0.05 #(2)模型参数的显著性检验 print(result_trend.pvalues) #这个结果貌似与R的不太一致 fig, ax = plt.subplots() ax = data['index'].loc['1952':].plot(ax=ax) result_trend.plot_predict('1989', '1992', dynamic=True, ax=ax, plot_insample=False) plt.show()
import matplotlib.pyplot as plt from sklearn import metrics as me data1 = pd.read_csv('e:/work/milkproduction.csv', header=0, index_col=0) data1 = pd.Series(data1['production']) data1_diff = data1.diff(1).dropna() x = pd.concat([data1_diff, data1], axis=1) print(x) print(adfuller(data1_diff, autolag='AIC')) model = ARIMA(data1_diff, order=(1, 0, 2)).fit(disp=-1) print(sum((data1_diff - model.fittedvalues)**2)) exit() ''' model.plot_predict() model.forecast() plot_acf(data1) plot_pacf(data1) sm.qqplot(model.resid,line='s') plt.show() ''' model_prediction_diff=pd.Series([data1[0],data1[1]-data1[0]],index=[data1.index[0],data1.index[1]])\ .append(model.fittedvalues) model_prediction = pd.Series.cumsum(model_prediction_diff) model_prediction.plot() data1.plot() plt.show()
df.dropna(inplace=True) ##df['Date'] = pd.to_datetime(df['Date']) LocalTransmission = df['LocalTransmission'].astype('int32') #print (df.head()) print(df.index) result = ARIMA(df, order=(1, 1, 1)).fit(disp=False) print(result.summary()) #print(result.params) predictions = result.predict(start="2020-03-01", end="2020-05-01") #accuracy = result.score() print(predictions) ##accuracy = result.score() #print (accuracy) result.plot_predict(start="2020-03-01", end="2020-05-01") plt.suptitle('Prediction for postive cases in Egypt \n Algorithm used: ARIMA', fontsize=12) plt.show() ##def mean_forecast_error(y, yhat): ## return y.sub(yhat).mean() def mean_forecast_error(LocalTransmission, predictions): return mean(sum(LocalTransmission, predictions)) mean_forecast_error(LocalTransmission, predictions) print(mean_forecast_error)
plt.show()#定阶 p=d=q=range(0,4) pdq=list(itertools.product(p,d,q)) for param in pdq: try: model=ARIMA(data1_diff,order=param).fit(disp=-1) print('ARIMA{} AIC:{} BIC:{}'.format(param,model.aic,model.bic)) except: continue ''' ####模型拟合#### model = ARIMA(data1_diff, order=(3, 0, 3)).fit(disp=-1) print(model.summary()) model.plot_predict() model.forecast() sm.qqplot(model.resid, line='s') plt.show() ####差分还原画图#### model_prediction_diff=pd.Series([data1[0],data1[1]-data1[0]],index=[data1.index[0],data1.index[1]])\ .append(model.fittedvalues) model_prediction = pd.Series.cumsum(model_prediction_diff) model_prediction.plot(label='forcest') data1.plot() plt.legend(loc='upper right') plt.show()
fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(111) ax = arima_mod100.resid.plot(ax=ax) ax.set_title("Residual series") plt.show() resid = arima_mod100.resid print "============== Residuals normality test ================" print st.normaltest(resid) print "========================================================" fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(111) ax.set_title("Residuals test for normality") fig = qqplot(resid, line='q', ax=ax, fit=True) plt.show() fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(111) ax = trainWTI.ix['2012':].plot(ax=ax) fig = arima_mod100.plot_predict('2014m1', '2015m12', dynamic=True, ax=ax, plot_insample=False) ax.set_title("Prediction of spot prices") ax.set_xlabel("Dates") ax.set_ylabel("Price [USD]") plt.show()
for model in ["additive", "multiplicative"]: ts_decompose(y, model, True) ################################################## # MODEL ################################################## arima_model = ARIMA(train, order=(1, 1, 1)).fit(disp=0) # order(p, d, q) arima_model.summary() y_pred = arima_model.forecast(48)[0] mean_absolute_error(test, y_pred) # 2.7193 arima_model.plot_predict(dynamic=False) plt.show() train["1985":].plot(legend=True, label="TRAIN") test.plot(legend=True, label="TEST", figsize=(6, 4)) pd.Series(y_pred, index=test.index).plot(legend=True, label="PREDICTION") plt.title("Train, Test and Predicted Test") plt.show() ################################################## # MODEL TUNING ################################################## ################################################## # Statistical Consideration of Model Degree Selection ##################################################
class autoARIMA(object): ''' A wrapper of statsmodels, ARIMA for easier model fitting and generating forecast this fits ARIMA model using brute force with lowest BIC(Bayesian Information Criteria) value. - Not the best way but its the easiest Possible alternative to look at - pyramid-arima ''' def __init__(self, endog, max_p=5, max_d=5, max_q=5, helpText=True): self.endog = endog self.max_p = max_p self.max_d = max_d self.max_q = max_q self.helpText = helpText self.fitted_model = None def getOrder(self): fittedOrder = {'order': [], 'bic': []} # iterate through (p,d,q) values for p in range(self.max_p): for d in range(self.max_d): for q in range(self.max_q): try: model = ARIMA(self.endog, order=(p, d, q)).fit(disp=0) fittedOrder['bic'].append(model.bic) fittedOrder['order'].append((p, d, q)) except: continue # find order with lowest bic value bestOrder = fittedOrder['order'][fittedOrder['bic'].index( min(fittedOrder['bic']))] if self.helpText == True: print('Lowest BIC value with order ', bestOrder) return bestOrder def fit(self): # return a fitted ARIMA model with lowest bic value self.fitted_model = ARIMA(self.endog, order=self.getOrder()).fit(disp=0) return self def forecast(self, num_step): # returns forecasted values and confidence limit of the forecast if self.fitted_model == None: print('ERROR: Fit the model first') return None forecast, _, confLimit = self.fitted_model.forecast(steps=num_step) return (forecast, confLimit) def inSamplePlot(self): self.fitted_model.plot_predict() return self def saveModel(self, fileName): with open(fileName, 'wb') as outModelFile: pickle.dump(self, outModelFile, pickle.HIGHEST_PROTOCOL) def loadModel(self, fileName): with open(fileName, 'rb') as inModelFile: self = pickle.load(inModelFile)
# A value close to 0 indicates strong positive correlation, while a value of 4 indicates strong negative correlation. print "==================== Durbin-Watson =====================" print sm.stats.durbin_watson(arima_mod100.resid.values) print "========================================================" fig = plt.figure(figsize=(10,5)) ax = fig.add_subplot(111) ax = arima_mod100.resid.plot(ax=ax) ax.set_title("Residual series") plt.show() resid = arima_mod100.resid print "============== Residuals normality test ================" print st.normaltest(resid) print "========================================================" fig = plt.figure(figsize=(10,5)) ax = fig.add_subplot(111) ax.set_title("Residuals test for normality") fig = qqplot(resid, line='q', ax=ax, fit=True) plt.show() fig = plt.figure(figsize=(10,5)) ax = fig.add_subplot(111) ax = trainWTI.ix['2012':].plot(ax=ax) fig = arima_mod100.plot_predict('2014m1', '2015m12', dynamic=True, ax=ax, plot_insample=False) ax.set_title("Prediction of spot prices") ax.set_xlabel("Dates") ax.set_ylabel("Price [USD]") plt.show()
model = pf.ARIMA(data=dts, ar=p, integ=d, ma=q) x = model.fit() model.plot_fit(figsize=(15, 4)) ''' mu, Y = model._model(model.latent_variables.get_z_values()) fitted_values = pd.Series(model.link(mu),index=dts.ix[-len(mu):].index) dts.subtract(fitted_values).plot() ''' #MLE: Maximum Likelihood Estimation model = pf.ARIMA(data=dts, ar=4, ma=4, integ=0, target='Current_value') #family is pf.Normal()[By default] x = model.fit("MLE") x.summary() model.plot_z(figsize=(15, 7)) # Latency Variable plot model.plot_z(indices=range(1, 9)) # Latency Variable plot model.plot_fit(figsize=(15, 5)) #ARIMA model fit model.plot_predict(h=30, figsize=( 15, 5)) # plots predictions for next 5 time steps, 95% confidence interval model.plot_predict_is( h=30 ) # plots rolling in-sample prediction for past 5 time steps :Idea of performance #model.plot_predict(h=20,past_values=20,figsize=(15,5)) #predictions = model.predict(h=5, intervals=True) # outputs dataframe of predictions ''' samples = model.sample(nsims=10) # returns 10 samples from the data ppc_pvalue = model.ppc(T=np.mean) # p-value for mean posterior predictive test model.plot_sample(nsims=10) # draws samples from the model model.plot_ppc(T=np.mean) # plots histogram of posterior predictive check for mean '''
print(predict_dta) print("##########使用forecast预测数据###################") ###################################使用forecast预测数据######################################################################### ###################################使用plot_predict预测数据##################################################################### print("##########使用plot_predict预测数据###################") if d == 0: predict_dta2 = arima.predict(start = forecast_start_date, end = forecast_end_date,dynamic = False) else: predict_dta2 = arima.predict(start = forecast_start_date, end = forecast_end_date,dynamic = False,typ = forecast_typ) print(predict_dta2) xdata_pred2,ax = plt.subplots(figsize = fig_size ) ax = data_analysis.ix[1:].plot(ax=ax) xdata_pred2 = arima.plot_predict(start = forecast_start_date,end = forecast_end_date,dynamic = False, ax = ax, plot_insample = False) plt.show() #print(xdata_pred2) print("##########使用plot_predict预测数据###################") ###################################使用plot_predict预测数据###################################################################### ###################################使用predict预测数据###################################################################### #dynamic=False参数确保我们产生一步前进的预测,这意味着每个点的预测都将使用到此为止的完整历史生成 #语法参考:http://www.statsmodels.org/stable/generated/statsmodels.tsa.arima_model.ARIMAResults.predict.html#statsmodels.tsa.arima_model.ARIMAResults.predict print("##########使用predict预测数据###################") if d == 0: xdata_pred = arima.predict(end = forecast_end_date, dynamic = False) #预测 else: xdata_pred = arima.predict(end = forecast_end_date, dynamic = False,typ = forecast_typ) #预测 print(xdata_pred)
#len(xTest) xTest arima = ARIMA(xTrain, order=(10, 2, 1)) arima = arima.fit() arima.summary() pred = arima.forecast(steps=len(xTest)) print(mean_squared_error(xTest, pred[0])) print(np.sqrt(mean_squared_error(xTest, pred[0]))) #pred ax = arima.plot_predict(start='2019-05-12', end='2019-06-10') ax.set_figheight(9) ax.set_figwidth(19) import itertools """Auto Arima""" auto = auto_arima(xTrain, start_p=0, start_q=0, d=0, max_d=9, max_p=30, end_q=30, start_P=0, start_Q=0,
diff = data['AnnualMeansBA'].diff(1) diff = diff.dropna() diff.plot() plt.show() import statsmodels from statsmodels.tsa.arima_model import ARIMA Diff = diff.to_frame().dropna().as_matrix() numpy.size(Diff) r = ARIMA(Diff, order=(2, 1, 1)) r = r.fit(disp=-1) r.plot_predict(1, 45) pred = r.predict(1, 45) #dates = pd.date_range('1961-01','1970-01',freq='M') predictions_ARIMA_diff = pandas.Series(pred, copy=True) predictions_ARIMA_diff_cumsum = predictions_ARIMA_diff.cumsum() predictions_ARIMA_log = pandas.Series(Diff.ix[0]) predictions_ARIMA_log = predictions_ARIMA_log.add(predictions_ARIMA_diff_cumsum,fill_value=0) predictions_ARIMA = numpy.exp(predictions_ARIMA_log) plt.plot(res) plt.plot(predictions_ARIMA)
plt.rcParams["figure.figsize"] = [15, 10] plt.rcParams["font.size"] = 14 data.plot(y='Monthly Mean Total Sunspot Number') plt.show() values = data.values rec_num = len(values) result = seasonal_decompose(data.interpolate(), model='additive') result.plot() plt.show() # autocorrelation autocorrelation_plot(data.values) plt.show() # split data split = int(rec_num * 0.7) train_vals = values[:split] test_vals = values[split:len(values)] # ARIMA data = sma.datasets.sunspots.load_pandas().data data.index = pd.Index(sma.tsa.datetools.dates_from_range('1700', '2008')) del data["YEAR"] model = ARIMA(data, order=(5, 1, 2)).fit(disp=False) model.summary() fig, ax = plt.subplots(figsize=(15, 10)) ax = data.loc['1945':].plot(ax=ax) model.plot_predict('2000', '2020', dynamic=True, ax=ax, plot_insample=False) plt.show()
price_24 = df.set_index( pd.date_range(start='12-31-2011', end='12-26-2020', freq='D')) return price_24 price_24 = get_price_24(price) price_model = ARIMA(price_24, order=(1, 1, 0)).fit() date_in = st.text_input(label='Input Date to Predict to(format YYYY-MM-DD') date_in = str(date_in) if st.button('Arima Prediction'): fig, ax = plt.subplots(1, figsize=(14, 4)) ax.plot(price_24['2017':].index, price_24['2017':]) fig = price_model.plot_predict('2020', f'{date_in}', dynamic=True, ax=ax, plot_insample=False) ax.legend().get_texts()[1].set_text("95% Prediction Interval") ax.legend(loc="lower left") ax.set_title("Price Forecasts from ARIMA Model") st.pyplot(fig) date = df.index fig3 = px.line(df.bpi.values, x=date, y=df.bpi) fig3.show() st.plotly_chart(fig3)
simulated_data = auto_regressive_process( len(metal_diff), np.array(list(metal_model.params)[1:])) simulated_data.index = metal_diff.index ax[i].plot(simulated_data.index, simulated_data, marker='.') ax[i].set_title("Simulated Data from " + name + " Model Fit") plt.tight_layout() plt.show() #make projection and compare to real data #not working year 0 next_day = pd.to_datetime(end_date) + td(days=1) next_year = pd.to_datetime(end_date, ) + td(days=365) next_day = next_day.date() metal.reindex(pd.DatetimeIndex(start=start_date, end=next_year.year, freq='D')) fig, ax = plt.subplots(1, figsize=(14, 4)) ax.plot(metal.index, metal, marker='.') fig = metal_model.plot_predict(end_date, next_year.year, dynamic=True, ax=ax, plot_insample=False) _ = ax.legend().get_texts()[1].set_text("95% Prediction Interval") _ = ax.legend(loc="lower left") _ = ax.set_title(name + " Series Forcasts from ARIMA Model") plt.show()
plt.plot(indexedDataset,color='blue') plt.plot(ARIMA.fittedvalues,color='black') plt.title('Rss:%4f'% sum((ARIMA.fittedvalues-indexedDataset['case'])**2)) #convert fitted values into series predict_ARIMA_diff=pd.Series(ARIMA.fittedvalues,copy=True) print(predict_ARIMA_diff.head()) cumsum_predictions=predict_ARIMA_diff.cumsum() print(cumsum_predictions.head()) predictions_ARIMA=pd.Series(indexedDataset['case'].iloc[0],index=indexedDataset.index) predictions_ARIMA=pd.Series(predictions_ARIMA.add(cumsum_predictions,fill_value=0)) predictions_ARIMA.head() #predictions_ARIMA=np.exp(predictions_ARIMA_log) plt.plot(indexedDataset,color='blue') plt.plot(predictions_ARIMA,color='red') indexedDataset ARIMA.plot_predict(1,312) x=ARIMA.forecast(steps=120) ## change in xlabel