y_hat = model_persistence(x) predictions.append(y_hat) submission_generator.generate(predictions) test_score = mean_absolute_error(test_y, predictions) print('Test MAE: %.3f' % test_score) # plot predictions vs expected plt.plot(test_y, label="real values") plt.plot(predictions, color='red', label="predictions") plt.legend(loc='upper left') plt.show() # Implementing Auto Regression Model # Training Autoregression print(train[:, 1]) model = AR(train_y) model_fit = model.fit() print('Lag: %s' % model_fit.k_ar) print('Coefficients: %s' % model_fit.params) # Making predictions predictions = model_fit.predict(start=len(train_y), end=len(train_y) + len(test_y) - 1, dynamic=False) error = mean_absolute_error(test_y, predictions) print('Test MAE: %.3f' % error) # Plotting results plt.plot(test_y, label="real values") plt.plot(predictions, color='red', label="predictions") plt.legend(loc='upper left') plt.show()
def test_ar_select_order(): # 2118 np.random.seed(12345) y = sm.tsa.arma_generate_sample([1, -0.75, 0.3], [1], 100) ts = Series(y, index=DatetimeIndex(start="1/1/1990", periods=100, freq="M")) ar = AR(ts) res = ar.select_order(maxlag=12, ic="aic") assert_(res == 2)
def test_ar_select_order(): # 2118 np.random.seed(12345) y = sm.tsa.arma_generate_sample([1, -.75, .3], [1], 100) ts = TimeSeries(y, index=DatetimeIndex(start='1/1/1990', periods=100, freq='M')) ar = AR(ts) res = ar.select_order(maxlag=12, ic='aic') assert_(res == 2)
def test_ar_select_order_tstat(): rs = np.random.RandomState(123) tau = 25 y = rs.randn(tau) ts = Series(y, index=date_range(start='1/1/1990', periods=tau, freq='M')) ar = AR(ts) res = ar.select_order(maxlag=5, ic='t-stat') assert_equal(res, 0)
def predict(x, params): x = delete6keep1(x) #x = range(10) try: model = AR(x) res = model.fit(maxlag=1) ret = int(res.predict(len(x), len(x))[0]) if ret>100: print x,ret return ret except Exception, err: return 0
def transform(self, X): """ Detect and remove dropped. """ out = [] for x in X: tmp = [] for a in x: ar_mod = AR(a[::self.subsample]) ar_res = ar_mod.fit(self.order) bse = ar_res.bse if len(bse)!=(self.order + 1): bse = np.array([np.nan] * (self.order + 1)) tmp.append(bse) out.append(tmp) return np.array(out)
def project(ser, start, end): """Fit AR model to series and project to end of index. Primarily useful for filling in missing values at the end of time series to ensure they match. ser: series to fit trend to start: date to begin fitting end: date to end fitting Returns: new_ser: series with missing end values replaced by fitted values.""" from statsmodels.tsa.ar_model import AR trend_mod = AR(ser[start:end]).fit() return trend_mod.predict( start=trend_mod.k_ar, end=ser.index.shape[0])
def __call__(self, sample): """ Computes self.n_coef AR coefficients for an array of samples See https://en.wikipedia.org/wiki/Autoregressive_model @param sample: m x n numpy array, m -- number of samples, n -- length of each sample @return: m x self.n_coef numpy array containing AR coefficients for each sample """ m = sample.shape[0] trend = 'c' if self.use_constant else 'nc' maxlag = self.n_coef - 1 if self.use_constant else self.n_coef features = [] for i in xrange(m): model = AR(sample[i]) results = model.fit(maxlag, trend=trend) features.append(results.params) return np.array(features)
def AutoRegression(train, test): model = AR(train) model_fit = model.fit() window = model_fit.k_ar coef = model_fit.params # walk forward over time steps in test history = train[len(train) - window:] # print(len(history)) history = [history[i] for i in range(len(history))] # print(history[0:5]) predictions = list() for t in range(len(test)): length = len(history) lag = [history[i] for i in range(length - window, length)] yhat = coef[0] for d in range(window): yhat += coef[d + 1] * lag[window - d - 1] obs = test[t] predictions.append(yhat) history.append(obs) # new observations added to history # print('predicted=%f, expected=%f' % (yhat, obs)) return predictions
def spectrum0_ar(x): z = np.arange(1, len(x) + 1) z = z[:, np.newaxis]**[0, 1] p, res, rnk, s = lstsq(z, x) residuals = x - np.matmul(z, p) if residuals.std() == 0: spec = order = 0 else: ar_out = AR(x).fit(ic='aic', trend='c') order = ar_out.k_ar spec = np.var(ar_out.resid) / (1 - np.sum(ar_out.params[1:]))**2 return spec, order
def sentiment_prediction(data, user): y_train = data["sentiments"] model = AR(y_train) model_fit = model.fit(maxlag=1) future_pred = model_fit.predict(start=len(data["sentiments"]), end=105, dynamic=False) fig = go.Figure() fig.add_trace( go.Scatter(y=data['sentiments'], mode='lines+markers', name='past sentiment', text=(data['time']))) fig.add_trace( go.Scatter(y=future_pred, x=list(range(len(data["sentiments"]), 105)), mode='lines+markers', name='prediction of future sentiment', text=(data['time']))) fig.update_layout( title=f"Sentiment Analysis of @{user} twitter interactions") fig.show()
def autoRegression3(day): col_daily = db['daily'] dailyGrossSet = [] for y in range(2008, 2018): for record in col_daily.find({"Year": y}): movieNumber = record['MoviesTracked'] gross = record['Gross($)'].replace(",", "") dailyGrossSet.append(int(gross) / int(movieNumber)) daycount = 0 for record in col_daily.find({"Year": 2018}): movieNumber = record['MoviesTracked'] gross = record['Gross($)'].replace(",", "") dailyGrossSet.append(int(gross) / int(movieNumber)) daycount += 1 if daycount >= day: break print(dailyGrossSet) # fit model model = AR(dailyGrossSet) model_fit = model.fit() # make prediction res = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet)) print(res)
def test_mle(self): # check predict with no constant, #3945 res1 = self.res1 endog = res1.model.endog res0 = AR(endog).fit(maxlag=9, method='mle', trend='nc', disp=0) assert_allclose(res0.fittedvalues[-10:], res0.fittedvalues[-10:], rtol=0.015) res_arma = ARMA(endog, (9, 0)).fit(method='mle', trend='nc', disp=0) assert_allclose(res0.params, res_arma.params, atol=5e-6) assert_allclose(res0.fittedvalues[-10:], res_arma.fittedvalues[-10:], rtol=1e-4)
def metodo_Dm(cpu_workload, Y, Z, output_list): X = Y train_size = int(len(X)) train, test = X[:train_size], X[:train_size] #print(len(train)," ",len(test)) #print("test = ",len(test)) # train autoregression model = AR(train) model_fit = model.fit() window = model_fit.k_ar coef = model_fit.params # walk forward over time steps in test history = train[len(train) - window:] history = [history[i] for i in range(len(history))] predictions = list() print(len(Z), " ", len(output_list), " ", len(test)) for t in range(len(test)): length = len(history) lag = [history[i] for i in range(length - window, length)] yhat = coef[0] for d in range(window): yhat += coef[d + 1] * lag[window - d - 1] obs = test[t] if (Z[t] == output_list[0] ): #or Z[t]==output_list[1] or Z[t]==output_list[2]): predictions.append(cpu_workload[t]) else: predictions.append(-yhat + 4) history.append(obs) #print('predicted=%f, expected=%f' % (yhat, obs)) error = mean_squared_error(test, predictions) return test, predictions, error
def autoregression(data, train_test_percentage=20): train_test_size = int(len(data) * float(train_test_percentage) / 100) train, test = data[0:train_test_size], data[train_test_size:] # train autoregression model = AR(train) model_fit = model.fit() window = model_fit.k_ar coef = model_fit.params # walk forward over time steps in test history = train[len(train)-window:] history = [history[i] for i in range(len(history))] predictions = list() for t in range(len(test)): length = len(history) lag = [history[i] for i in range(length-window, length)] yhat = coef[0] for d in range(window): yhat += coef[d+1] * lag[window-d-1] obs = test[t] predictions.append(yhat) history.append(obs) mse_error = mean_squared_error(test, predictions) print 'Autoregression MSE: '+ str(mse_error) pyplot.plot(range(len(test)), predictions, color='red', lw=2, label='prediction') pyplot.plot(range(len(test)), test, color='green', lw=2, label='actual') pyplot.ylabel('max temp') pyplot.xlabel('days from 1/1/2009') pyplot.title('Autoregression') pyplot.show() return predictions
def modelling_AR(df, name): """ Function to get the prediction model AR and apply to our DF """ data_close = df[f'CLOSE_{name}'] b, a = signal.butter(3, 1/10) filtrd_data_close = signal.filtfilt(b, a, data_close) df2 = pd.DataFrame({"X":data_close.to_numpy(),"Xf": filtrd_data_close},index=df.index) dr = df2.index realidad = df2.loc[dr[:22808]] futuro = df2.loc[dr[22808:]] predictions_AR = dict() for col in realidad.columns: train = realidad[col] test = futuro[col] # Entrena el modelo AR model_AR = AR(train) print(f"Entrenando con los datos desde la serie {col}") model_fit_AR = model_AR.fit(maxlag=4) # Predice los valores AR predictions_AR[f'{col}_prediction'] = model_fit_AR.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False) pred_AR = pd.DataFrame(predictions_AR) pred_AR.index = futuro.index AR_predictions = pd.DataFrame({ "GT":futuro.X, "X":pred_AR.X_prediction, "Xf":pred_AR.Xf_prediction, "diff_X": futuro.X - pred_AR.X_prediction, "diff_Xf":futuro.X - pred_AR.Xf_prediction},index=futuro.index) return AR_predictions
def AutoRegressive(self, data, testSize=2, test=True): # Autoregressive model used for time-series predictions # if test= True, then select the last testSize points as test set # else predict for a period of testSize print(data.shape) if test: trainData = data[:-testSize] testData = data[-testSize:] else: trainData = data model = AR(trainData) modelFit = model.fit() winSize, coeff = modelFit.k_ar, modelFit.params predData = list(trainData[-winSize:]) pred = [] for i in range(testSize): x = list(predData[-winSize:]) y = coeff[0] # use winSize number of data to predict future value for n in range(winSize): y += coeff[n + 1] * x[winSize - (n + 1)] if test: # use test data to predict future value predData.append(testData[i]) else: # use predicted value to predict future value predData.append(y) pred.append(y) if test: error = mse(testData, pred) return pred, error, testData else: error = None return pred, error
def time_series(ts_dict, num_pred=7, title="Efficiency"): '''Models and predicts time series from data''' data = [] for k in ts_dict: ts = ts_dict[k] train, test = ts[1:len(ts) - num_pred], ts[len(ts) - num_pred:] # train autoregression model = AR(train, freq="W") model_fit = model.fit() # make predictions predictions = model_fit.predict(start=len(train), end=len(train) + len(test) - 1, dynamic=False) # Create a trace results predict = pd.concat([ts[len(ts) - 8:len(ts) - 7], predictions]) line_predict = go.Scatter(x=predict.index, y=predict.values, name="prediccion " + k) # , marker={'color': 'rgb(0,255,0)'}) line_hist = go.Scatter(x=train.index, y=train.values, name="historicos " + k) data += [line_hist, line_predict] layout = go.Layout(title=title) figure = go.Figure(data=data, layout=layout) return ({ 'figure': figure, 'curr_val': train[-1], 'first_pred': predictions[0] })
def fit_ar(outputs, inputs, guessed_dim): """Fits an AR model of order p = guessed_dim. Args: outputs: Array with the output values from the LDS. inputs: Array with exogenous inputs values. guessed_dim: Guessed hidden dimension. Returns: - Fitted AR coefficients. """ if outputs.shape[1] > 1: # If there are multiple output dimensions, fit autoregressive params on # each dimension separately and average. params_list = [ fit_ar(outputs[:, j:j+1], inputs, guessed_dim) \ for j in xrange(outputs.shape[1])] return np.mean(np.concatenate([a.reshape(1, -1) for a in params_list]), axis=0) if inputs is None: model = AR(outputs).fit(ic='bic', trend='c', maxlag=guessed_dim, disp=0) arparams = np.zeros(guessed_dim) arparams[:model.k_ar] = model.params[model.k_trend:] return arparams else: model = ARMA(outputs, order=(guessed_dim, 0), exog=inputs) try: arma_model = model.fit(start_ar_lags=guessed_dim, trend='c', disp=0) return arma_model.arparams except (ValueError, np.linalg.LinAlgError) as e: warnings.warn(str(e), sm_exceptions.ConvergenceWarning) return np.zeros(guessed_dim)
def test_ar_errors(reset_randomstate): with pytest.raises(ValueError, match='Only the univariate case'): with pytest.warns(FutureWarning): AR(np.empty((1000, 2))) with pytest.warns(FutureWarning): ar = AR(np.random.standard_normal(1000)) with pytest.raises(ValueError, match='Method yw not'): ar.fit(method='yw') with pytest.raises(ValueError, match='ic option fpic not'): ar.fit(ic='fpic') res = ar.fit() with pytest.raises(ValueError, match='Start must be >= k_ar'): res.predict(start=0) with pytest.raises(ValueError, match='Prediction must have `end` after'): res.predict(start=100, end=99) with pytest.raises(ValueError, match='Length of start params'): with pytest.warns(FutureWarning): AR(np.random.standard_normal(1000)).fit(maxlag=2, method='mle', start_params=[1, 1])
def test_ar_errors(reset_randomstate): with pytest.raises(ValueError, match="Only the univariate case"): with pytest.warns(FutureWarning): AR(np.empty((1000, 2))) with pytest.warns(FutureWarning): ar = AR(np.random.standard_normal(1000)) with pytest.raises(ValueError, match="Method yw not"): ar.fit(method="yw") with pytest.raises(ValueError, match="ic option fpic not"): ar.fit(ic="fpic") res = ar.fit() with pytest.raises(ValueError, match="Start must be >= k_ar"): res.predict(start=0) with pytest.raises(ValueError, match="Prediction must have `end` after"): res.predict(start=100, end=99) with pytest.raises(ValueError, match="Length of start params"): with pytest.warns(FutureWarning): AR(np.random.standard_normal(1000)).fit(maxlag=2, method="mle", start_params=[1, 1])
class TestingActivity4_01(unittest.TestCase): def setUp(self) -> None: ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) self.data = pd.read_csv( os.path.join(ROOT_DIR, '..', 'Datasets', 'austin_weather.csv')) def test_AR(self): self.model = AR(self.data.TempAvgF) self.model_fit = self.model.fit() self.max_lag = self.model_fit.k_ar self.assertEqual(self.max_lag, (23)) self.params = self.model_fit.params[0:4] self.assertEqual(round(self.params[0], 4), (1.9094)) self.assertEqual(round(self.params[1], 4), (0.9121))
def test_mle(self): # check predict with no constant, #3945 res1 = self.res1 endog = res1.model.endog with pytest.warns(FutureWarning): res0 = AR(endog).fit(maxlag=9, method="mle", trend="nc", disp=0) assert_allclose(res0.fittedvalues[-10:], res0.fittedvalues[-10:], rtol=0.015) res_arma = ARIMA(endog, order=(9, 0, 0), trend="n").fit() assert_allclose(res0.params, res_arma.params[:-1], rtol=1e-2) assert_allclose(res0.fittedvalues[-10:], res_arma.fittedvalues[-10:], rtol=1e-4)
def get_stat_AR_coefficients(self, signals, max_lag): """Get the auto-regression coefficients for a set of time series signals. Args: signals (DataFrame): A Pandas DataFrame of waveforms, one per column max_lag (float): The maximum number of AR coefficients to return. Will be zero padded if model requires less than the number specified. Returns DataFrame: A dataframe that contains a single row where each column is a parameter coefficient. """ for i in range(0, np.shape(signals)[1]): # The AR model throws for some constant signals. The signals should have been normalized into z-scores, in # which case the parameters for an all zero signal are all zero. if self.is_constant_signal(signals[i]) and signals[0, i] == 0: parameters = np.append((np.zeros(max_lag + 1))) else: model = AR(signals[:, i]) model_fit = model.fit(maxlag=max_lag, ic=None) if np.shape(model_fit.params)[0] < max_lag + 1: parameters = np.pad( model_fit.params, (0, max_lag + 1 - np.shape(model_fit.params)[0]), 'constant', constant_values=0) elif np.shape(model_fit.params)[0] > max_lag + 1: parameters = model_fit.params[:max_lag] else: parameters = model_fit.params if i == 0: coefficients = parameters else: coefficients = np.append(coefficients, parameters, axis=0) return pd.DataFrame(coefficients).T
def returnpred(p, m, file='SIH.csv'): dataset = pd.read_csv(file) x1 = dataset.loc[(dataset['Product_Name'] == p) & (dataset['Month'] == m)] y1 = x1.groupby('Day').mean() y1 = y1.rename(columns={'Month': 'days'}) y = y1.iloc[:, 5] n1 = len(y) train1 = y[0:25] test1 = y[25:n1] model_AR = AR(train1) model_fit_AR = model_AR.fit() predictions_AR = model_fit_AR.predict(start=25, end=n1 + 10) plt.figure() plt.plot(test1) plt.plot(predictions_AR, color='red') plt.title("Future Predictions of different company") plt.legend(['Original', 'Predictions']) fig = plt.gcf() plotly_fig = tls.mpl_to_plotly(fig) plotly_fig['layout']['width'] = 1200 plot_div = plot(plotly_fig, output_type='div', include_plotlyjs=False) return plot_div
class TestingExercise4_01(unittest.TestCase): def setUp(self) -> None: ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) self.data = pd.read_csv( os.path.join(ROOT_DIR, '..', 'Datasets', 'spx.csv')) def test_AR(self): self.model = AR(self.data.close) self.model_fit = self.model.fit() self.max_lag = self.model_fit.k_ar self.assertEqual(self.max_lag, (36)) self.params = self.model_fit.params[0:4] self.assertEqual(round(self.params[0], 4), (0.1142)) self.assertEqual(round(self.params[1], 4), (0.9442))
def main(csv_file_path): # load csv all_samples = load_csv(csv_file_path) # split to test and train train, test = split_samples(all_samples) # set history=train (duplicate train) history = list(train) # for i < number_of_predictions prediction_list = list() for prediction_index in range(PREDICTIONS): # train model on history model = AR(history) model_fit = model.fit() # predict next value and concatenate to prediction list predictions = model_fit.predict_using_learned_params( start=len(history), end=len(history), dynamic=False) prediction_list.append(predictions[0]) # concatenate test[i] to history history.append(test[prediction_index]) print('predicted={pred_value}, expected={real_value}'.format( pred_value=prediction_list[-1], real_value=test[prediction_index])) # keep history to same length history = history[1:] # calculate MSE with test and prediction lists error = mean_squared_error(test, prediction_list) print('Test MSE = {mse_value}'.format(mse_value=error)) # return test and predictions return test, prediction_list
def AR_prediction(data, test_data, test_for_AR, ar_summary): print("AR_prediction() start execute") tickers = test_for_AR log_returns = to_log_return(data) with open('prediction_results/AR_model_prediction.txt', 'w') as results_file: for ticker in tickers: print("AR_prediction() start execute in ticker: " + ticker) log_rtn = log_returns[ticker].dropna() result = AR(log_rtn).fit(ar_summary[ticker][0]) result_show = result.predict(test_data.index[0], test_data.index[-1]) test_log_returns = to_log_return(test_data) test_log_rtn = test_log_returns[ticker].dropna() test_log_rtn = test_log_rtn[result_show.index] visualization(test_log_rtn, result_show, 'AR_prediction', ticker) rmse = sqrt( sum((result_show - test_log_rtn).dropna()**2) / test_log_rtn.size) message = "The prediction for {} is \n {}\n RMSE:{}\n".format( ticker, result_show, rmse) results_file.write(message) return
def OU_fitting(series): # series: pd.Series, indexed by date # return the fitted OU process model params. ar_model = AR(endog=series).fit(maxlag=1) [b, a] = ar_model.params.tolist() resid_std = np.std(ar_model.resid) lam = -np.log(a) mu = b / (1 - a) sigma = resid_std * np.sqrt(-2 * np.log(a) / (1 - a * a)) res = {'ar_model': ar_model, 'lam': lam, 'mu': mu, 'sigma': sigma} return (res)
def ar_coefficient(x, c, param): """ This feature calculator fit the unconditional maximum likelihood of an autoregressive AR(k) process. The k parameter is the maximum lag of the process .. math:: X_{t}=\\varphi_0 +\\sum _{{i=1}}^{k}\\varphi_{i}X_{{t-i}}+\\varepsilon_{t} For the configurations from param which should contain the maxlag "k" and such an AR process is calculated. Then the coefficients :math:`\\varphi_{i}` whose index :math:`i` contained from "coeff" are returned. :param x: the time series to calculate the feature of :type x: pandas.Series :param c: the time series name :type c: str :param param: contains dictionaries {"coeff": x, "k": y} with x,y int :type param: list :return x: the different feature values :return type: pandas.Series """ df_cfg = pd.DataFrame(param) df_cfg["k"] = df_cfg["k"].apply(int) res = pd.Series() for k in df_cfg["k"].unique(): coeff = df_cfg[df_cfg["k"] == k]["coeff"] try: mod = AR(list(x)).fit(maxlag=k, solver="mle").params res_tmp = pd.Series(index=["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p) for p in coeff]) for p in coeff: if p <= k: try: res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = mod[p] except IndexError: res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = 0 else: res_tmp["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p)] = np.NaN except (LinAlgError, ValueError): res_tmp = pd.Series([np.NaN] * len(coeff), index=["{}__ar_coefficient__k_{}__coeff_{}".format(c, k, p) for p in coeff]) res = res.append(res_tmp) return res
def __init__(self, train, test, last_train, alpha, diff): """ The class is initialized to transfored external parameters into class properties. """ self.train, self.test, self.last_train = train, test, last_train self.models_init, self.models_param = dict(), dict() k = len(last_train) self.idx_orig = self.train.columns[:-k] self.idx_trans = self.train.columns[-k:] self.alpha = alpha self.diff = diff for i in self.idx_trans: self.models_init[i] = AR(self.train[i].values)
def predict_AR(df, window=ROLLING_WINDOW, p=1): """第一种方法: 在给定滚动周期下利用AR(P)模型预测 输入: df:DataFrame, 波动率原始数据 window: 整数滚动周期 p: int, lag of AR model 输出: vols_pred: 时间序列, 预测波动率 """ fit = lambda x: AR(x).fit(maxlag=p, disp=0).predict(start=x.size, end=x.size) vols_pred = df[VOL_NAME].rolling(window).apply(fit) vols_pred.name = 'AR' + '_' + repr(window) + '_' + repr(p) print(vols_pred.name + " prediction finished.") return vols_pred
def initialize(df): global predictions predictions = pd.DataFrame() user_input = input( "Do you want to forecast by CATEGORY, VENDOR or PRODUCT? Select one: ") user_input = user_input.upper() user_selection = input('Type down your specific' + ' ' + user_input + ' in the same format' + ':') dataset = df[['INV_AMOUNT_USD', user_input, 'Date']] dataset = dataset.groupby([user_input, 'Date']).sum().reset_index() sample = dataset.loc[dataset[user_input] == user_selection][[ 'INV_AMOUNT_USD', 'Date' ]] FBProphet(sample) AR(sample) ARIMA(sample) expo_smooth(sample)
def predict_AR(array, p=1): """第一种方法: 在给定滚动周期下利用AR(P)模型预测 输入: df:DataFrame, 波动率原始数据 window: 整数滚动周期 p: int, lag of AR model 输出: vols_pred: 时间序列, 预测波动率 """ #fit = lambda x: AR(x).fit(maxlag=p, disp=0).predict(start=x.size, end=x.size) #vols_pred = df[VOL_NAME].rolling(window).apply(fit) vols_pred = AR(array).fit(maxlag=p, disp=0).predict(start=array.size, end=array.size, dynamic=True) return vols_pred
def ARC_cal(input_file): #Auto Regression Coefficient #https://machinelearningmastery.com/autoregression-models-time-series-forecasting-python/ import pandas as pd from pandas import Series from matplotlib import pyplot from statsmodels.tsa.ar_model import AR #conda install statsmodels from sklearn.metrics import mean_squared_error #pip install -U scikit-learn scipy matplotlib #conda installe scikit-learn import numpy as np with open(input_file) as file_var: for line in file_var: channel_numbers = len(line.split()) #actually 8 as already known break #print("Channel number:", channel_numbers) ''' number_of_lines = 0 with open(input_file) as file_var: for line in file_var: number_of_lines += 1 #print("Line number:", number_of_lines) ''' ARC_list = [] ARC_list_LagNumber = [] ''' for column_n in range(channel_numbers): ARC_list.append([]) ''' input_series = pd.read_csv(input_file, header=None) input_tmp_file = input_series.values number_of_lines = len(input_tmp_file) input_file_array = [[ float(ele) for ele in input_tmp_file[line_n][0].split() ] for line_n in range(number_of_lines)] input_file_array = np.array(input_file_array) for column_n in range(channel_numbers): model_fit = AR(input_file_array[:, column_n]).fit() ARC_list_LagNumber.append(model_fit.k_ar) ARC_list.append(model_fit.params.tolist()) return ARC_list_LagNumber, ARC_list
def test_summary_corner(): data = sm.datasets.macrodata.load_pandas().data["cpi"].diff().dropna() dates = period_range(start='1959Q1', periods=len(data), freq='Q') data.index = dates res = AR(data).fit(maxlag=4) summ = res.summary().as_text() assert 'AR(4)' in summ assert 'L4.cpi' in summ assert '03-31-1959' in summ res = AR(data).fit(maxlag=0) summ = res.summary().as_text() assert 'const' in summ assert 'AR(0)' in summ
def forecasting(model_name, resultsDict, predictionsDict, df, df_training, df_testcase): #index = len(df_training) yhat = list() for t in tqdm(range(len(df_testcase.Ambient_Temp))): temp_train = df[:len(df_training) + t] if model_name == "SES": model = SimpleExpSmoothing(temp_train.Ambient_Temp) elif model_name == "HWES": model = ExponentialSmoothing(temp_train.Ambient_Temp) elif model_name == "AR": model = AR(temp_train.Ambient_Temp) # elif model_name == "MA": # model = ARMA(temp_train.Ambient_Temp, order=(0, 1)) # elif model_name == "ARMA": # model = ARMA(temp_train.Ambient_Temp, order=(1, 1)) elif model_name == "ARIMA": model = ARIMA(temp_train.Ambient_Temp, order=(1, 0, 0)) elif model_name == "SARIMAX": model = SARIMAX(temp_train.Ambient_Temp, order=(1, 0, 0), seasonal_order=(0, 0, 0, 3)) model_fit = model.fit() if model_name == "SES" or "HWES": predictions = model_fit.predict(start=len(temp_train), end=len(temp_train)) elif model_name == "AR" or "ARIMA" or "SARIMAX": predictions = model_fit.predict(start=len(temp_train), end=len(temp_train), dynamic=False) yhat = yhat + [predictions] yhat = pd.concat(yhat) resultsDict[model_name] = metrics.evaluate(df_testcase.Ambient_Temp, yhat.values) predictionsDict[model_name] = yhat.values plt.plot(df_testcase.Ambient_Temp.values, label='Original') plt.plot(yhat.values, color='red', label=model_name + ' predicted') plt.legend() plt.show()
class AutoRegression(PredictionModel): def __init__(self): super(AutoRegression, self).__init__() self.model = None self.model_fitted = None self.train_df = None def train(self, predict_state, predict_field): self.train_df = self.assign_train_df(predict_field) self.state = predict_state self.model = AR(self.train_df[predict_state]) self.model_fitted = self.model.fit(maxlag=1) def predict(self): pred = self.model_fitted.predict(start=len(self.train_df[self.state]), end=len(self.train_df[self.state]) + FUTURE_DAYS - 1, dynamic=False) return np.round(pred, 0).astype(np.int32).array
def ARcast(data,time,dt=False,axis=-1,missing=0): """ Forecast the data by using AutoRegressive method. The code automatically find the unevenly sampled data point, and then forecast the that point by using AR method. Parameters ---------- data : ~numpy.ndarray n dimensional data. Data must have the same number of elements to the time. time : astropy.time.core.Time The time for the each data points. dt : (optional) float An Interval of the time between each data in second unit. axis : (optional) int An axis to forecast. missing : (optional) float The missing value of the data. It may be due to data alignment. Returns ------- ARdata : ~numpy.ndarray Autoregressived data. It must be larger elements then input data. tf : ~numpy.ndarray Time the forecasted ARdata points. Notes ----- Input time must be the astropy.time.core.Time, but output time is the ~numpy.ndarray. References ---------- `AR model <https://en.wikipedia.org/wiki/Autoregressive_model>`_.\n `statsmodels.tsa.ar_model.AR <http://statsmodels.sourceforge.net/devel/generated/statsmodels.tsa.ar_model.AR.html>`_. Example ------- >>> from fisspy.analysis.forecast import ARcast >>> ARdata, tf = ARcast(data,t,dt=20.,axis=1) """ if not dt: dt=(time[1]-time[0]).value shape=list(data.shape) shape0=list(data.shape) if shape[axis]!=len(time): raise ValueError('The size of data is different from the size of time.') t=(time-time[0])*24*3600 t=t.value tf=np.arange(t[0],t[-1],dt,dtype=float) interp=interp1d(t,data,axis=axis) datai=interp(tf) shape.pop(axis) ind=[shape0.index(i) for i in shape] ind=[axis]+ind datat=datai.transpose(ind) shapei=datat.shape datat=datat.reshape((shapei[0],np.prod(shapei[1:]))) shapet=datat.shape td=t-np.roll(t,1) addi=np.where(td >= dt*2)[0] for wh in addi: for i in range(shapet[1]): y=datat[:,i] wh2=wh+int(td[wh]/dt-1) if (y==missing).sum()<4: bar=AR(y) car=bar.fit() dar=car.predict(int(wh),int(wh2)) datat[wh:wh2+1,i]=dar else: datat[wh:wh2+1,i]=missing datat=datat.reshape((shapei)) return datat.transpose(ind), tf
def get_lpc(trame): ar_mod = AR(trame) ar_res = ar_mod.fit(20) return ar_res.params
return numpy.array(diff) # Make a prediction give regression coefficients and lag obs def predict(coef, history): yhat = coef[0] for i in range(1, len(coef)): yhat += coef[i] * history[-i] return yhat series = Series.from_csv('../data/nifty.csv', header=0) # split dataset X = difference(series.values) size = int(len(X) * 0.66) train, test = X[0:size], X[size:] # train autoregression model = AR(train) model_fit = model.fit(maxlag=6, disp=False) window = model_fit.k_ar coef = model_fit.params # walk forward over time steps in test history = [train[i] for i in range(len(train))] predictions = list() for t in range(len(test)): yhat = predict(coef, history) obs = test[t] predictions.append(yhat) history.append(obs) error = mean_squared_error(test, predictions) print('Test MSE: %.3f' % error) # plot pyplot.plot(test)
def test_ar_select_order_tstat(): rs = np.random.RandomState(123) tau = 25 y = rs.randn(tau) ts = Series(y, index=DatetimeIndex(start="1/1/1990", periods=tau, freq="M")) ar = AR(ts) res = ar.select_order(maxlag=5, ic="t-stat") assert_equal(res, 0)