def predict(x, params): x = delete6keep1(x) #x = range(10) try: model = AR(x) res = model.fit(maxlag=1) ret = int(res.predict(len(x), len(x))[0]) if ret>100: print x,ret return ret except Exception, err: return 0
def transform(self, X): """ Detect and remove dropped. """ out = [] for x in X: tmp = [] for a in x: ar_mod = AR(a[::self.subsample]) ar_res = ar_mod.fit(self.order) bse = ar_res.bse if len(bse)!=(self.order + 1): bse = np.array([np.nan] * (self.order + 1)) tmp.append(bse) out.append(tmp) return np.array(out)
def __call__(self, sample): """ Computes self.n_coef AR coefficients for an array of samples See https://en.wikipedia.org/wiki/Autoregressive_model @param sample: m x n numpy array, m -- number of samples, n -- length of each sample @return: m x self.n_coef numpy array containing AR coefficients for each sample """ m = sample.shape[0] trend = 'c' if self.use_constant else 'nc' maxlag = self.n_coef - 1 if self.use_constant else self.n_coef features = [] for i in xrange(m): model = AR(sample[i]) results = model.fit(maxlag, trend=trend) features.append(results.params) return np.array(features)
def sentiment_prediction(data, user): y_train = data["sentiments"] model = AR(y_train) model_fit = model.fit(maxlag=1) future_pred = model_fit.predict(start=len(data["sentiments"]), end=105, dynamic=False) fig = go.Figure() fig.add_trace( go.Scatter(y=data['sentiments'], mode='lines+markers', name='past sentiment', text=(data['time']))) fig.add_trace( go.Scatter(y=future_pred, x=list(range(len(data["sentiments"]), 105)), mode='lines+markers', name='prediction of future sentiment', text=(data['time']))) fig.update_layout( title=f"Sentiment Analysis of @{user} twitter interactions") fig.show()
def autoRegression3(day): col_daily = db['daily'] dailyGrossSet = [] for y in range(2008, 2018): for record in col_daily.find({"Year": y}): movieNumber = record['MoviesTracked'] gross = record['Gross($)'].replace(",", "") dailyGrossSet.append(int(gross) / int(movieNumber)) daycount = 0 for record in col_daily.find({"Year": 2018}): movieNumber = record['MoviesTracked'] gross = record['Gross($)'].replace(",", "") dailyGrossSet.append(int(gross) / int(movieNumber)) daycount += 1 if daycount >= day: break print(dailyGrossSet) # fit model model = AR(dailyGrossSet) model_fit = model.fit() # make prediction res = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet)) print(res)
def AutoRegression(train, test): model = AR(train) model_fit = model.fit() window = model_fit.k_ar coef = model_fit.params # walk forward over time steps in test history = train[len(train) - window:] # print(len(history)) history = [history[i] for i in range(len(history))] # print(history[0:5]) predictions = list() for t in range(len(test)): length = len(history) lag = [history[i] for i in range(length - window, length)] yhat = coef[0] for d in range(window): yhat += coef[d + 1] * lag[window - d - 1] obs = test[t] predictions.append(yhat) history.append(obs) # new observations added to history # print('predicted=%f, expected=%f' % (yhat, obs)) return predictions, window, coef
def metodo_Dm(cpu_workload, Y, Z, output_list): X = Y train_size = int(len(X)) train, test = X[:train_size], X[:train_size] #print(len(train)," ",len(test)) #print("test = ",len(test)) # train autoregression model = AR(train) model_fit = model.fit() window = model_fit.k_ar coef = model_fit.params # walk forward over time steps in test history = train[len(train) - window:] history = [history[i] for i in range(len(history))] predictions = list() print(len(Z), " ", len(output_list), " ", len(test)) for t in range(len(test)): length = len(history) lag = [history[i] for i in range(length - window, length)] yhat = coef[0] for d in range(window): yhat += coef[d + 1] * lag[window - d - 1] obs = test[t] if (Z[t] == output_list[0] ): #or Z[t]==output_list[1] or Z[t]==output_list[2]): predictions.append(cpu_workload[t]) else: predictions.append(-yhat + 4) history.append(obs) #print('predicted=%f, expected=%f' % (yhat, obs)) error = mean_squared_error(test, predictions) return test, predictions, error
def AutoRegressive(self, data, testSize=2, test=True): # Autoregressive model used for time-series predictions # if test= True, then select the last testSize points as test set # else predict for a period of testSize print(data.shape) if test: trainData = data[:-testSize] testData = data[-testSize:] else: trainData = data model = AR(trainData) modelFit = model.fit() winSize, coeff = modelFit.k_ar, modelFit.params predData = list(trainData[-winSize:]) pred = [] for i in range(testSize): x = list(predData[-winSize:]) y = coeff[0] # use winSize number of data to predict future value for n in range(winSize): y += coeff[n + 1] * x[winSize - (n + 1)] if test: # use test data to predict future value predData.append(testData[i]) else: # use predicted value to predict future value predData.append(y) pred.append(y) if test: error = mse(testData, pred) return pred, error, testData else: error = None return pred, error
def autoregression(data, train_test_percentage=20): train_test_size = int(len(data) * float(train_test_percentage) / 100) train, test = data[0:train_test_size], data[train_test_size:] # train autoregression model = AR(train) model_fit = model.fit() window = model_fit.k_ar coef = model_fit.params # walk forward over time steps in test history = train[len(train)-window:] history = [history[i] for i in range(len(history))] predictions = list() for t in range(len(test)): length = len(history) lag = [history[i] for i in range(length-window, length)] yhat = coef[0] for d in range(window): yhat += coef[d+1] * lag[window-d-1] obs = test[t] predictions.append(yhat) history.append(obs) mse_error = mean_squared_error(test, predictions) print 'Autoregression MSE: '+ str(mse_error) pyplot.plot(range(len(test)), predictions, color='red', lw=2, label='prediction') pyplot.plot(range(len(test)), test, color='green', lw=2, label='actual') pyplot.ylabel('max temp') pyplot.xlabel('days from 1/1/2009') pyplot.title('Autoregression') pyplot.show() return predictions
def modelling_AR(df, name): """ Function to get the prediction model AR and apply to our DF """ data_close = df[f'CLOSE_{name}'] b, a = signal.butter(3, 1/10) filtrd_data_close = signal.filtfilt(b, a, data_close) df2 = pd.DataFrame({"X":data_close.to_numpy(),"Xf": filtrd_data_close},index=df.index) dr = df2.index realidad = df2.loc[dr[:22808]] futuro = df2.loc[dr[22808:]] predictions_AR = dict() for col in realidad.columns: train = realidad[col] test = futuro[col] # Entrena el modelo AR model_AR = AR(train) print(f"Entrenando con los datos desde la serie {col}") model_fit_AR = model_AR.fit(maxlag=4) # Predice los valores AR predictions_AR[f'{col}_prediction'] = model_fit_AR.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False) pred_AR = pd.DataFrame(predictions_AR) pred_AR.index = futuro.index AR_predictions = pd.DataFrame({ "GT":futuro.X, "X":pred_AR.X_prediction, "Xf":pred_AR.Xf_prediction, "diff_X": futuro.X - pred_AR.X_prediction, "diff_Xf":futuro.X - pred_AR.Xf_prediction},index=futuro.index) return AR_predictions
def time_series(ts_dict, num_pred=7, title="Efficiency"): '''Models and predicts time series from data''' data = [] for k in ts_dict: ts = ts_dict[k] train, test = ts[1:len(ts) - num_pred], ts[len(ts) - num_pred:] # train autoregression model = AR(train, freq="W") model_fit = model.fit() # make predictions predictions = model_fit.predict(start=len(train), end=len(train) + len(test) - 1, dynamic=False) # Create a trace results predict = pd.concat([ts[len(ts) - 8:len(ts) - 7], predictions]) line_predict = go.Scatter(x=predict.index, y=predict.values, name="prediccion " + k) # , marker={'color': 'rgb(0,255,0)'}) line_hist = go.Scatter(x=train.index, y=train.values, name="historicos " + k) data += [line_hist, line_predict] layout = go.Layout(title=title) figure = go.Figure(data=data, layout=layout) return ({ 'figure': figure, 'curr_val': train[-1], 'first_pred': predictions[0] })
def fit_ar(outputs, inputs, guessed_dim): """Fits an AR model of order p = guessed_dim. Args: outputs: Array with the output values from the LDS. inputs: Array with exogenous inputs values. guessed_dim: Guessed hidden dimension. Returns: - Fitted AR coefficients. """ if outputs.shape[1] > 1: # If there are multiple output dimensions, fit autoregressive params on # each dimension separately and average. params_list = [ fit_ar(outputs[:, j:j+1], inputs, guessed_dim) \ for j in xrange(outputs.shape[1])] return np.mean(np.concatenate([a.reshape(1, -1) for a in params_list]), axis=0) if inputs is None: model = AR(outputs).fit(ic='bic', trend='c', maxlag=guessed_dim, disp=0) arparams = np.zeros(guessed_dim) arparams[:model.k_ar] = model.params[model.k_trend:] return arparams else: model = ARMA(outputs, order=(guessed_dim, 0), exog=inputs) try: arma_model = model.fit(start_ar_lags=guessed_dim, trend='c', disp=0) return arma_model.arparams except (ValueError, np.linalg.LinAlgError) as e: warnings.warn(str(e), sm_exceptions.ConvergenceWarning) return np.zeros(guessed_dim)
def returnpred(p, m, file='SIH.csv'): dataset = pd.read_csv(file) x1 = dataset.loc[(dataset['Product_Name'] == p) & (dataset['Month'] == m)] y1 = x1.groupby('Day').mean() y1 = y1.rename(columns={'Month': 'days'}) y = y1.iloc[:, 5] n1 = len(y) train1 = y[0:25] test1 = y[25:n1] model_AR = AR(train1) model_fit_AR = model_AR.fit() predictions_AR = model_fit_AR.predict(start=25, end=n1 + 10) plt.figure() plt.plot(test1) plt.plot(predictions_AR, color='red') plt.title("Future Predictions of different company") plt.legend(['Original', 'Predictions']) fig = plt.gcf() plotly_fig = tls.mpl_to_plotly(fig) plotly_fig['layout']['width'] = 1200 plot_div = plot(plotly_fig, output_type='div', include_plotlyjs=False) return plot_div
def get_stat_AR_coefficients(self, signals, max_lag): """Get the auto-regression coefficients for a set of time series signals. Args: signals (DataFrame): A Pandas DataFrame of waveforms, one per column max_lag (float): The maximum number of AR coefficients to return. Will be zero padded if model requires less than the number specified. Returns DataFrame: A dataframe that contains a single row where each column is a parameter coefficient. """ for i in range(0, np.shape(signals)[1]): # The AR model throws for some constant signals. The signals should have been normalized into z-scores, in # which case the parameters for an all zero signal are all zero. if self.is_constant_signal(signals[i]) and signals[0, i] == 0: parameters = np.append((np.zeros(max_lag + 1))) else: model = AR(signals[:, i]) model_fit = model.fit(maxlag=max_lag, ic=None) if np.shape(model_fit.params)[0] < max_lag + 1: parameters = np.pad( model_fit.params, (0, max_lag + 1 - np.shape(model_fit.params)[0]), 'constant', constant_values=0) elif np.shape(model_fit.params)[0] > max_lag + 1: parameters = model_fit.params[:max_lag] else: parameters = model_fit.params if i == 0: coefficients = parameters else: coefficients = np.append(coefficients, parameters, axis=0) return pd.DataFrame(coefficients).T
def main(csv_file_path): # load csv all_samples = load_csv(csv_file_path) # split to test and train train, test = split_samples(all_samples) # set history=train (duplicate train) history = list(train) # for i < number_of_predictions prediction_list = list() for prediction_index in range(PREDICTIONS): # train model on history model = AR(history) model_fit = model.fit() # predict next value and concatenate to prediction list predictions = model_fit.predict_using_learned_params( start=len(history), end=len(history), dynamic=False) prediction_list.append(predictions[0]) # concatenate test[i] to history history.append(test[prediction_index]) print('predicted={pred_value}, expected={real_value}'.format( pred_value=prediction_list[-1], real_value=test[prediction_index])) # keep history to same length history = history[1:] # calculate MSE with test and prediction lists error = mean_squared_error(test, prediction_list) print('Test MSE = {mse_value}'.format(mse_value=error)) # return test and predictions return test, prediction_list
def generate_AR_para(self, rawwave, filtered=False, wavt=False, AR_order=10): signal = rawwave ''' W = fftfreq(signal.size, d= 1 / 512) psd = rfft(signal) #discrete Fourier transform of a real sequence filtered_psd = psd.copy() filtered_psd[(W<30)] = 0 filtered_signal = irfft(filtered_psd) ''' if filtered == True: if wavt == False: filtered_signal, _, _ = self.selective_freq_range(signal, high_freq=30, low_freq=1.5) ARModel = AR(filtered_signal) else: filtered_signal, _ = self.wavelet_transform(signal) ARModel = AR(filtered_signal) else: ARModel = AR(signal) #ARModel_fit = ARModel.fit() ARModel_fit = ARModel.fit(maxlag=AR_order) return ARModel_fit.params
predictions.append(y_hat) submission_generator.generate(predictions) test_score = mean_absolute_error(test_y, predictions) print('Test MAE: %.3f' % test_score) # plot predictions vs expected plt.plot(test_y, label="real values") plt.plot(predictions, color='red', label="predictions") plt.legend(loc='upper left') plt.show() # Implementing Auto Regression Model # Training Autoregression print(train[:, 1]) model = AR(train_y) model_fit = model.fit() print('Lag: %s' % model_fit.k_ar) print('Coefficients: %s' % model_fit.params) # Making predictions predictions = model_fit.predict(start=len(train_y), end=len(train_y) + len(test_y) - 1, dynamic=False) error = mean_absolute_error(test_y, predictions) print('Test MAE: %.3f' % error) # Plotting results plt.plot(test_y, label="real values") plt.plot(predictions, color='red', label="predictions") plt.legend(loc='upper left') plt.show()
def ar(self): model = AR(self.inputs) model_fit = model.fit() return model_fit.params
pd.plotting.autocorrelation_plot(sales_data['sales']) # sales_data['sales'].corr(sales_data['sales'].shift(12)) # decomposed = seasonal_decompose(sales_data['sales'], model='additive') # x = decomposed.plot() sales_data['stationary'] = sales_data['sales'].diff() #creating model # create train/test datasets X = sales_data['stationary'].dropna() train_data = X[1:len(X) - 12] test_data = X[X[len(X) - 12:]] # train the autoregression model model = AR(train_data) model_fitted = model.fit() print('The lag value chose is: %s' % model_fitted.k_ar) print('The coefficients of the model are:\n %s' % model_fitted.params) predictions = model_fitted.predict(start=len(train_data), end=len(train_data) + len(test_data) - 1, dynamic=False) # create a comparison dataframe compare_df = pd.concat([sales_data['stationary'].tail(12), predictions], axis=1).rename(columns={ 'stationary': 'actual', 0: 'predicted' })
def test_resids_mle(): data = sm.datasets.sunspots.load_pandas() with pytest.warns(FutureWarning): ar = AR(np.asarray(data.endog)) res = ar.fit(1, method='mle', disp=-1) assert res.resid.shape[0] == data.endog.shape[0]
324.87 , 322.41 , 323.64 , 322.73 , 324.45 , 326.65 , 325.71 , 327.95 , 327.45 , 328.19 , 330.92] # Autoregression (AR) example # fit model modelAR = AR(data) modelAR_fit = modelAR.fit() # make prediction yhatAR = modelAR_fit.predict(len(data), len(data)) print(yhatAR) # End Autoregression # Moving Average (MA) example # fit model modelMA = ARMA(data, order=(0, 1)) modelMA_fit = modelMA.fit(disp=False) # make prediction yhatMA = modelMA_fit.predict(len(data), len(data)) print(yhatMA) # End Moving Average # # Autoregressive Moving Average (ARMA) example
yield finally: sys.stdout = old_stdout if len(sys.argv) < 2: print("Select a country"); else: def difference(dataset): diff = list() for i in range(1, len(dataset)): value = dataset[i] - dataset[i - 1] diff.append(value) return numpy.array(diff) # load dataset series = read_csv('tmp/'+sys.argv[1].lower()+'.csv', header=0, index_col=0) X = difference(series.values) # fit model model = AR(X) model_fit = model.fit(maxlag=6, disp=False) # save model to file model_fit.save('data/'+sys.argv[1].lower()+'/ar_model.pkl') # save the differenced dataset numpy.save('data/'+sys.argv[1].lower()+'/ar_data.npy', X) # save the last ob numpy.save('data/'+sys.argv[1].lower()+'/ar_obs.npy', [series.values[-1]])
data_pk_half = data_pk_half.fillna(0) #%% data_pk_full = data2[['COUNTY', 'Year', 'PK (FULL DAY)']] data_pk_full.dropna(inplace=True) data_pk_full = data_pk_full.fillna(0) #%% dataframe_list = [] for county in data_kg_full.COUNTY.unique(): county1 = data2[data2.COUNTY == county] series = pd.Series( county1['KG (FULL DAY)'].to_list(), index=county1['Year'].to_list()) # create lagged dataset X = series.values train, test = X[1:len(X) - 7], X[len(X) - 7:] model = AR(train) model_fit = model.fit() print('Lag: %s' % model_fit.k_ar) print('Coefficients: %s' % model_fit.params) # make predictions print(len(train)) print(len(train) + len(test) - 1) predictions_list = [] initial_list = [] new_list = X.tolist() print(new_list) initial_list.append(new_list[0]) initial_list.extend(train.tolist()) predictions = model_fit.predict(start=len(train), end=len(train) + len(test) + 5, dynamic=False) for i in range(len(predictions)):
def autoregression(list_statistics): model = AR(list_statistics) model_fit = model.fit() # make prediction yhat = model_fit.predict(len(list_statistics), len(list_statistics)+9) return yhat
# Make a prediction give regression coefficients and lag obs def predict(coef, history): yhat = coef[0] for i in range(1, len(coef)): yhat += coef[i] * history[-i] return yhat series = Series.from_csv('../data/nifty.csv', header=0) # split dataset X = difference(series.values) size = int(len(X) * 0.66) train, test = X[0:size], X[size:] # train autoregression model = AR(train) model_fit = model.fit(maxlag=6, disp=False) window = model_fit.k_ar coef = model_fit.params # walk forward over time steps in test history = [train[i] for i in range(len(train))] predictions = list() for t in range(len(test)): yhat = predict(coef, history) obs = test[t] predictions.append(yhat) history.append(obs) error = mean_squared_error(test, predictions) print('Test MSE: %.3f' % error) # plot pyplot.plot(test) pyplot.plot(predictions, color='red')
plt.show() """Remove seasonal effect and store as column in dateframe.""" x = df.Average - df.Season df['x'] = pd.Series(x) """Plot autocorrelation and partial autocorrelation.""" #autocorrelation_plot(x) #plot_pacf(x, lags=50) #plt.show() # We clearly see that lag 3 is the right choice. #sm.OLS(x, lag_func(x)) model = AR(x) model_fit = model.fit(maxlag = 40, ic = 'aic', trend = 'nc') print('Lag: %s' % model_fit.k_ar) print('Coefficients: %s' % model_fit.params) print('Residuals: %s' % model_fit.sigma2) """Find CAR(3) coefficients from AR(3) coefficients.""" alpha1 = 3 - model_fit.params[0] alpha2 = 2 * alpha1- model_fit.params[1] - 3 alpha3 = alpha2 - alpha1 - model_fit.params[2] + 1 #print(alpha1, alpha2, alpha3) """Create column with year, month and day.""" df['Year'] = pd.DatetimeIndex(df['Date']).year df['Month'] = pd.DatetimeIndex(df['Date']).month df['Day'] = pd.DatetimeIndex(df['Date']).day
def ARcast(data,time,dt=False,axis=-1,missing=0): """ Forecast the data by using AutoRegressive method. The code automatically find the unevenly sampled data point, and then forecast the that point by using AR method. Parameters ---------- data : ~numpy.ndarray n dimensional data. Data must have the same number of elements to the time. time : astropy.time.core.Time The time for the each data points. dt : (optional) float An Interval of the time between each data in second unit. axis : (optional) int An axis to forecast. missing : (optional) float The missing value of the data. It may be due to data alignment. Returns ------- ARdata : ~numpy.ndarray Autoregressived data. It must be larger elements then input data. tf : ~numpy.ndarray Time the forecasted ARdata points. Notes ----- Input time must be the astropy.time.core.Time, but output time is the ~numpy.ndarray. References ---------- `AR model <https://en.wikipedia.org/wiki/Autoregressive_model>`_.\n `statsmodels.tsa.ar_model.AR <http://statsmodels.sourceforge.net/devel/generated/statsmodels.tsa.ar_model.AR.html>`_. Example ------- >>> from fisspy.analysis.forecast import ARcast >>> ARdata, tf = ARcast(data,t,dt=20.,axis=1) """ if not dt: dt=(time[1]-time[0]).value shape=list(data.shape) shape0=list(data.shape) if shape[axis]!=len(time): raise ValueError('The size of data is different from the size of time.') t=(time-time[0])*24*3600 t=t.value tf=np.arange(t[0],t[-1],dt,dtype=float) interp=interp1d(t,data,axis=axis) datai=interp(tf) shape.pop(axis) ind=[shape0.index(i) for i in shape] ind=[axis]+ind datat=datai.transpose(ind) shapei=datat.shape datat=datat.reshape((shapei[0],np.prod(shapei[1:]))) shapet=datat.shape td=t-np.roll(t,1) addi=np.where(td >= dt*2)[0] for wh in addi: for i in range(shapet[1]): y=datat[:,i] wh2=wh+int(td[wh]/dt-1) if (y==missing).sum()<4: bar=AR(y) car=bar.fit() dar=car.predict(int(wh),int(wh2)) datat[wh:wh2+1,i]=dar else: datat[wh:wh2+1,i]=missing datat=datat.reshape((shapei)) return datat.transpose(ind), tf
def AR_model(s_y): model = AR(s_y) model_fit = model.fit(maxlag=50) yhat = model_fit.predict(100, len(s_y)) yhat = np.hstack([np.zeros([99]), yhat])
def update(self): self.variance = round( self.beta * self.variance + (1 - self.beta) * abs(self.prediction - self.last_arrival_time), 2) from statsmodels.tsa.ar_model import AR begin = max(0, self.index - self.window) data = self.arrivals[begin:self.index] # fit model model = AR(data) model_fit = model.fit() self.model = model_fit # make prediction self.prediction = model_fit.predict(len(data), len(data))[0] minVal = 0 maxVal = 0 if len(self.predictedArrivals) < 4: minVal = min(self.predictedArrivals[-self.windowArrival:]) maxVal = max(self.predictedArrivals[-self.windowArrival:]) else: cut = self.predictedArrivals[-self.windowArrival:] cut.sort() minVal = cut[1] maxVal = cut[-2] if self.prediction < minVal: self.prediction = minVal elif self.prediction > maxVal: self.prediction = maxVal interset = self.resultDifferences[-300:] if len( self.resultDifferences) > 300 else self.resultDifferences intersetWO = np.abs(array(interset)) interset = reject_outliers_2(intersetWO) self.meanPE.append(np.mean(interset)) self.variancePE.append(interset.var()) self.stdPE.append(np.std(interset)) self.medianPE.append(np.median(interset)) self.rmsPE.append(math.sqrt(np.square(interset).mean())) interset = self.resultDifferences[-200:] if len( self.resultDifferences) > 200 else self.resultDifferences previousArrival = -100 if len(self.predictedArrivals) > 1: previousArrival = self.predictedArrivals[-1] for key in self.constrains: new_timeout = new_timeout = round( self.prediction + (self.constraints_to_K[key]) * self.variance, 3) if previousArrival != -100: predTrend = 1 if self.prediction - previousArrival > 0 else 0 if predTrend == int(self.altPred): new_timeout = round( self.prediction + (self.constraints_to_K[key]) * self.variance, 3) self.timeouts[key].append(new_timeout) continue index = math.ceil(len(interset) * key) index = min(index, len(interset) - 1) #interset = [abs(x) for x in interset] interset.sort() element = interset[index] margin = (keySmoother2(key) * 2 * np.median(interset)) + element extreme = interset[-2] if len(interset) > 1 else interset[-1] new_timeout = round(self.prediction + extreme, 3)
def test_roots(): data = sm.datasets.sunspots.load_pandas() with pytest.warns(FutureWarning): ar = AR(np.asarray(data.endog)) res = ar.fit(1) assert_almost_equal(res.roots, np.array([1. / res.params[-1]]))
def __projections(self, indicators, baseyear): """ Generates indicator level projections till current year. This treats each indicator for each country as a time series. The projections are made using an AR(n) model, where n is determined by a heuristic approach (n here is the number of lag variables). For cases where data is insufficient, we simply treat it as missing which is better than projecting incorrectly. indicators: all indicators to project baseyear: year to project to. returns: a dataframe """ start_time = time() pdf = self.df.copy(deep=True) pdf['year_idx'] = pd.to_datetime(pdf.year, format='%Y') pdf = pdf.set_index('year_idx').to_period(freq='Y') cnt = 0 ign = 0 # The resulting dataframe proj_df = pd.DataFrame() ts = pdf.groupby(['Country Code', 'Indicator Code']) for (country, ind), grp in ts: if (country in SSA) & (ind in indicators): # Years for which projection is needed years = np.arange(grp.year.max() + 1, baseyear + 1) # observations available in this time series obs = len(grp) # Maximum lag to consider for the AR model lag = min(len(grp) - 1, MAX_LAG) logger.debug( "Country: {}, Indicator: {}, observations: {}, maxlag: {}, num years to project: {}" .format(country, ind, obs, lag, len(years))) if (years.size > 0) & (years.size <= 5) & (obs > 5): # Do some interpolation if needed X = grp.value.copy(deep=True) X = X.resample('Y').sum() X = X.interpolate() # Fit and score an AR(n) model model = AR(X, missing='raise') model_fit = model.fit(maxlag=lag, trend='nc') pred = model_fit.predict(start=str(years.min()), end=str(years.max())) cnt += 1 # Conform to the overall dataframe curr_df = pd.DataFrame() curr_df['value'] = pred curr_df['Country Code'] = country curr_df['Indicator Code'] = ind curr_df['Country Name'] = grp['Country Name'][0] curr_df['Indicator Name'] = grp['Indicator Name'][0] curr_df.reset_index(inplace=True) curr_df.rename(columns={'index': "year"}, inplace=True) curr_df = curr_df[[ 'Country Name', 'Country Code', 'Indicator Name', 'Indicator Code', 'year', 'value' ]] proj_df = pd.concat([proj_df, curr_df], ignore_index=True) else: # Don't do projections if relatively recent data isn't available # or isn't needed. # print("long time series") ign += 1 else: # No projections needed for countries outside Sub-Saharan Africa pass logger.info( "Projections made for {} time series ({} ignored or not needed).". format(cnt, ign)) logger.info("Projections made in {:3.2f} sec.".format(time() - start_time)) # Change the year from period to integer proj_df.year = proj_df.year.apply(lambda x: int(x.strftime("%Y"))) return proj_df
wine.head() wine.tail() wine.head().append(wine.tail()) wine.shape wine #176 observations wineTrg = wine[0:108] # Up to December '88 #create model for 108 observations wineVal = wine[108:] # From January '89 until end wineTrg wineVal #%%%%% wineTrg.rolling(window=3) wine_ma3c = wineTrg.rolling(window=3, center=True).mean() wine_ma3c wine_ma3 = wineTrg.rolling(window=3, center=False).mean() wine_ma3 #%%% Exponential Smothening from statsmodels.tsa.ar_model import AR model1 = AR(wineTrg) model1_fit = model1.fit() # make prediction yhat1 = model1_fit.predict(len(wineTrg), len(wineTrg)) print(yhat1)
from random import random #------------------------------------------------------------------------------------------------- # AR example # contrived dataset xdata = range(1, 100) ydata = [x + (3*random()) for x in xdata] plt.xlim(0, 100) plt.ylim(0, 100) #------------------------------- plt.scatter(xdata,ydata,s=10) plt.show() print() #------------------------------- # fit model model = AR(ydata) model_fit = model.fit() #------------------------------- # make prediction #yhat = model_fit.predict(len(xdata), len(ydata)) yhat = model_fit.predict( start= 90, end = 110 ) print('Predicted value for Auto Regression ', yhat) print("="*50) #------------------------------------------------------------------------------------------------- # MA example # fit model model = ARMA(ydata, order=(0, 1)) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.predict( start= 90, end = 110 ) print('Predicted value for Moving Average 0,1 ',yhat) print("="*50)
#matplotlib.rcParams['xtick.labelsize'] = 12 #matplotlib.rcParams['ytick.labelsize'] = 12 #matplotlib.rcParams['text.color'] = 'k' #rcParams['figure.figsize'] = 18, 8 df = pd.read_csv("CHARTEVENTS_HR_FILTERED.csv") #,SUBJECT_ID,HADM_ID,ICUSTAY_ID,CHARTTIME,HEART_RATE heart_rate_36 = df.loc[df['SUBJECT_ID'] == 36] heart_rate_36 = heart_rate_36[['CHARTTIME','HEART_RATE']] #Make the index a time datatype, make only one reading per hour and fill in missing values heart_rate_36['CHARTTIME'] = pd.to_datetime(heart_rate_36['CHARTTIME']) heart_rate_36 = heart_rate_36.set_index('CHARTTIME') heart_rate_36_resampled = heart_rate_36.resample('H').mean() heart_rate_36_resampled = heart_rate_36_resampled.interpolate(method='linear') print ("Original data points: " + str(len(heart_rate_36))) print ("Resampled hourly data points: " + str(len(heart_rate_36_resampled))) print (plt.style.available) #Autoregression (AR) model = AR(heart_rate_36_resampled) model_fit = model.fit() heart_rate_36_forecast = model_fit.predict(len(heart_rate_36_resampled), len(heart_rate_36_resampled)+24) plt.figure(figsize=(16,8)) plt.plot(heart_rate_36, label='Original') plt.plot(heart_rate_36_resampled, label='Resampled') plt.plot(heart_rate_36_forecast, label='AR Forecast') plt.legend(loc='best') plt.show()
for i in range(1, len(coef)): yhat += coef[i] * history[-i] return yhat series = pd.read_csv('daily-total-female-births.csv', header = 0, index_col = 0, parse_dates = True, squeeze = True) #Spliteamos nuestro conjunto de datos X = difference(series.values) size = int(len(X)*0.66) train, test = [0:size], X[size:] #Entrenamos nuestro modelo autoregresivo model = AR(train) model_fit = model.fit(maxlag = 6, disp = False) window = model_fit.k_ar coef = model_fit.params #Hacemos predicciones de forma walk forward history = [train[i] for i in range(len(train))] predictions = list() for t in range(len(test)): yhat = predict(coef, history) obs = test[t] predictions.append(yhat) history.append(obs) rmse = sqrt(mean_squared_error(test, predictions)) print('Test RMSE: %.3f' % rmse)
# In[30]: len(newthr1) # In[45]: # train = newthr1[0:12095] # test = newthr1[12094:] train = newthr1[0:100] test = newthr1[100:120] predictions = [] # In[46]: model_ar = AR(train) model_ar_fit = model_ar.fit() # In[48]: predictions = model_ar_fit.predict(start=100, end=120) # In[49]: plt.plot(test) plt.plot(predictions, color='red') # In[50]: predictions # In[38]:
def get_lpc(trame): ar_mod = AR(trame) ar_res = ar_mod.fit(20) return ar_res.params