def regression_for_states(X_s, y_s): X_s = sm.add_constant(X_s) from_ = 5 prd = [np.mean(y_s) for _ in range(from_)] #tscv = TimeSeriesSplit(n_splits= len(y_s)-from_) #for train_index, test_index in tscv.split(X_s): # X_train, X_test = X_s[train_index], X_s[test_index] # y_train, y_test = y_s[train_index], y_s[test_index] mod = sm.RecursiveLS(y_s, X_s) res = mod.fit() #prd.append(res.predict(X_test)) prd = [res.predicted_state[i, i] for i in range(y_s.shape[0])] resid = y_s.flatten() - prd prd = np.array(prd) resid = y_s.flatten() - prd X = np.arange(len(y_s))[:, np.newaxis] kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \ + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-10, 1e-1)) gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0).fit(X, resid) X_ = np.linspace(0, 12, 12) y_mean, y_cov = gp.predict(X_[:, np.newaxis], return_cov=True) return ({'pred': prd + y_mean, 'var': y_cov})
def recursivels_model(): # Recursive Least Squares dta = sm.datasets.copper.load_pandas().data dta.index = pd.date_range("1951-01-01", "1975-01-01", freq="AS") endog = dta.WORLDCONSUMPTION # To the regressors in the dataset, we add a column of ones for an intercept exog = sm.add_constant( dta[["COPPERPRICE", "INCOMEINDEX", "ALUMPRICE", "INVENTORYINDEX"]] # pylint: disable=E1136 ) rls = sm.RecursiveLS(endog, exog) model = rls.fit() inference_dataframe = pd.DataFrame([["1951-01-01", "1975-01-01"]], columns=["start", "end"]) return ModelWithResults(model=model, alg=rls, inference_dataframe=inference_dataframe)
dta.index = pd.date_range('1951-01-01', '1975-01-01', freq='AS') endog = dta['WORLDCONSUMPTION'] # To the regressors in the dataset, we add a column of ones for an # intercept exog = sm.add_constant( dta[['COPPERPRICE', 'INCOMEINDEX', 'ALUMPRICE', 'INVENTORYINDEX']]) # First, construct and fit the model, and print a summary. Although the # `RLS` model computes the regression parameters recursively, so there are # as many estimates as there are datapoints, the summary table only presents # the regression parameters estimated on the entire sample; except for small # effects from initialization of the recursions, these estimates are # equivalent to OLS estimates. mod = sm.RecursiveLS(endog, exog) res = mod.fit() print(res.summary()) # The recursive coefficients are available in the `recursive_coefficients` # attribute. Alternatively, plots can generated using the # `plot_recursive_coefficient` method. print(res.recursive_coefficients.filtered[0]) res.plot_recursive_coefficient(range(mod.k_exog), alpha=None, figsize=(10, 6)) # The CUSUM statistic is available in the `cusum` attribute, but usually # it is more convenient to visually check for parameter stability using the # `plot_cusum` method. In the plot below, the CUSUM statistic does not move # outside of the 5% significance bands, so we fail to reject the null
def modelSm(self): mdl = sm.RecursiveLS(self.oFrame[cols[-1]], self.oFrame[cols[:-1]]).fit() mdl.summary() return mdl
tickerDF[ticker].loc[:, 'maxChange'] = tickerDF[ ticker].loc[:, 'demeanCumChange'].expanding().max() tickerDF[ticker].loc[:, 'chgRange'] = tickerDF[ ticker].loc[:, 'maxChange'] - tickerDF[ticker].loc[:, 'minChange'] tickerDF[ticker].loc[:, 'chgRatio'] = tickerDF[ ticker].loc[:, 'chgRange'] / tickerDF[ticker].loc[:, 'cumStd'] hurstMod[ticker] = RollingOLS( np.log(tickerDF[ticker].loc[:, 'chgRatio']).dropna(), sm.add_constant(np.log( range(1, 1 + len(tickerDF[ticker].loc[:, 'chgRatio'].dropna()))), prepend=False), window=300) recursiveHurst[ticker] = sm.RecursiveLS( np.log(tickerDF[ticker].loc[:, 'chgRatio']).dropna(), sm.add_constant(np.log( range(1, 1 + len(tickerDF[ticker].loc[:, 'chgRatio'].dropna()))), prepend=False)) res = hurstMod[ticker].fit() res2 = recursiveHurst[ticker].fit() res2.plot_recursive_coefficient() #plt.figure() #sns.regplot(np.log(range(1,1+len(tickerDF[ticker].loc[:,'chgRatio'].dropna()))), np.log(tickerDF[ticker].loc[:,'chgRatio']).dropna()).set_title(ticker) #sns.lineplot(tickerDF[ticker].loc[:,'chgRatio'].dropna().index, res.params.loc[:,'x1']) #plotTicker='IEF' #sns.lineplot(x=tickerDF[ticker].index, y=tickerDF[ticker].loc[:,'chgRatio']) #calculate and store fractional dimensions or just load them #dimensions_long_term = scanAndStore(800, dimensionFile_long_term) #dimensions_month = scanAndStore(30, 'dimensions_month.obj')
def RegressionAnalysis(df, Independent, Explanatory, Indicators, prefix=None): """ This function performs regression models, comparaison between series Arguments: ---------- - df: Pandas DataFrame Contains the data to be analyzed - Independent: str The name of column in df for the Independent variable data - Explanatory: str or list The name of the column in df for the Explanatory variable data. In case of a multivariate analysis, needed to pass a list object of all column names. - Indicators: list The list of the indicators/models names to compute Return: ---------- - df: Pandas DataFrame - Contains the initial df and all series indicators are added like the Residuals or the Fitted Values - OneValueIndicators: Pandas DataFrame - Contains all the indicators calculated with only one value like the FTest or the TTest """ if Indicators == None: Indicators = [ "OLS", "GLSAR", "RecursiveLS", "Yule Walker Order 1", "Yule Walker Order 2", "Yule Walker Order 3", "Burg Order 1", "Burg Order 2", "Burg Order 3", "QuantReg", "GLM Binomial", "GLM Gamma", "GLM Gaussian", "GLM Inverse Gaussian", "GLM Negative Binomial", "GLM Poisson", "GLM Tweedie" "AR", "ARMA", "ARIMA", "Granger Causality", "Levinson Durbin", "Cointegration" ] # Pre-processing Independent = df[Independent] Independent = pd.DataFrame(Independent) Explanatory = df[Explanatory] Explanatory = pd.DataFrame(Explanatory) y_sm = np.array(Independent).reshape((-1, 1)) x_sm = np.array(Explanatory) x_sm = sm.add_constant(x_sm) NumDecimal = 3 # Number of decimals for rounding numbers OneValueIndicators = {} if prefix == None: prefix = "" ################################################## ##### PART 1: Linear Regression ################################################## """ ########## Section 1: OLS """ name = "OLS" if name in Indicators: name = prefix + name model = sm.OLS(y_sm, x_sm) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 2: WLS """ ### Not Implemented """ ########## Section 3: GLS """ ### Not Implemented """ ########## Section 4: GLSAR """ name = "GLSAR" if name in Indicators: name = prefix + name model = sm.GLSAR(y_sm, x_sm, 1) results = model.iterative_fit(1) ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 5: RLS """ name = "RecursiveLS" if name in Indicators: name = prefix + name model = sm.RecursiveLS(y_sm, x_sm) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators[name + " Z Value"] = results.zvalues ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) # Cumsum # Not Implemented """ ########## Section 6: Yule Walker ORder 1 """ name = "Yule Walker Order 1" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma = statsmodels.regression.linear_model.yule_walker( x_sm[:, 1].flatten(), order=1) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma OneValueIndicators[name + " Sigma"] = round(sigma, NumDecimal) """ ########## Section 7: Yule Walker ORder 2 """ name = "Yule Walker Order 2" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma = statsmodels.regression.linear_model.yule_walker( x_sm[:, 1].flatten(), order=2) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma2 OneValueIndicators[name + " Sigma"] = round(sigma, NumDecimal) """ ########## Section 8: Yule Walker ORder 3 """ name = "Yule Walker Order 3" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma = statsmodels.regression.linear_model.yule_walker( x_sm[:, 1].flatten(), order=3) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma OneValueIndicators[name + " Sigma"] = round(sigma, NumDecimal) """ ########## Section 9: Burg's AR(p) ORder 1 """ name = "Burg Order 1" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma2 = statsmodels.regression.linear_model.burg( x_sm[:, 1].flatten(), order=1) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma2 OneValueIndicators[name + " Sigma2"] = round(sigma2, NumDecimal) """ ########## Section 10: Burg's AR(p) ORder 2 """ name = "Burg Order 2" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma2 = statsmodels.regression.linear_model.burg( x_sm[:, 1].flatten(), order=2) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma2 OneValueIndicators[name + " Sigma2"] = round(sigma2, NumDecimal) """ ########## Section 11: Burg's AR(p) ORder 3 """ name = "Burg Order 3" if name in Indicators and len(Explanatory.columns) == 1: name = prefix + name rho, sigma2 = statsmodels.regression.linear_model.burg( x_sm[:, 1].flatten(), order=3) ### One Value Indicators # Rho OneValueIndicators[name + " Rho"] = round(rho[0], NumDecimal) # Sigma2 OneValueIndicators[name + " Sigma2"] = round(sigma2, NumDecimal) """ ########## Section 12: Quantile Regression """ name = "QuantReg" if name in Indicators: name = prefix + name model = sm.QuantReg(y_sm, x_sm) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 2: Generalized Linear Models ################################################## """ ########## Section 1: GLM Binomial """ name = "GLM Binomial" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Binomial()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 2: GLM Gamma """ name = "GLM Gamma" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Gamma()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 3: GLM Gaussian """ name = "GLM Gaussian" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Gaussian()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 3: GLM InverseGaussian """ name = "GLM Inverse Gaussian" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.InverseGaussian()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 4: GLM NegativeBinomial """ name = "GLM Negative Binomial" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.NegativeBinomial()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 5: GLM Poisson """ name = "GLM Poisson" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Poisson()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) """ ########## Section 6: GLM Tweedie """ name = "GLM Tweedie" if name in Indicators: name = prefix + name model = sm.GLM(y_sm, x_sm, family=sm.families.Tweedie()) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators["Pearson chi2"] = round(results.pearson_chi2, NumDecimal) ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 3: Robust Linear Models ################################################## ################################################## ##### PART 4: AR models ################################################## name = "AR" if name in Indicators: name = prefix + name model = statsmodels.tsa.ar_model.AR(Independent) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators[name + " Final Prediction Error"] = results.fpe OneValueIndicators[ name + " Hannan-Quinn Information Criterion"] = results.hqic OneValueIndicators[name + " Roots"] = results.roots ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 5: ARMA ################################################## name = "ARMA" if name in Indicators: name = prefix + name model = statsmodels.tsa.arima_model.ARMA(y_sm, (5, 5), x_sm) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators[name + " AR Params"] = results.arparams OneValueIndicators[name + " AR Roots"] = results.arroots OneValueIndicators[name + " AR Freq"] = results.arfreq OneValueIndicators[ name + " Hannan-Quinn Information Criterion"] = results.hqic OneValueIndicators[name + " MA Params"] = results.maparams try: OneValueIndicators[name + " MA Roots"] = results.maroots except: pass try: OneValueIndicators[name + " MA Freq"] = results.mafreq except: pass OneValueIndicators[name + " Sigma2"] = results.sigma2 ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 6: ARIMA ################################################## name = "ARIMA" if name in Indicators: name = prefix + name model = statsmodels.tsa.arima_model.ARIMA(Independent, (2, 2, 2), Explanatory) results = model.fit() ### One Value Indicators OneValueIndicators = Statsmodels_Regression_All_OneValueIndicators( OneValueIndicators, name, results, Explanatory, NumDecimal) OneValueIndicators[name + " AR Params"] = results.arparams OneValueIndicators[name + " AR Roots"] = results.arroots OneValueIndicators[name + " AR Freq"] = results.arfreq OneValueIndicators[ name + " Hannan-Quinn Information Criterion"] = results.hqic OneValueIndicators[name + " MA Params"] = results.maparams OneValueIndicators[name + " MA Roots"] = results.maroots OneValueIndicators[name + " MA Freq"] = results.mafreq OneValueIndicators[name + " Sigma2"] = results.sigma2 ### Time Series Indicators # Fitted Values df = Statsmodels_FittedValues(df, results, name) # Residuals df = Statsmodels_LR_Residuals(df, results, name) ################################################## ##### PART 7: Univariate Analysis ################################################## # Granger Causality name = "Granger Causality" name = prefix + name if name in Indicators: OneValueIndicators[name] = ts.grangercausalitytests( Independent.merge(Explanatory, how="inner", left_index=True, right_index=True), maxlag=10) # Levinson Durbin name = "Levinson Durbin" name = prefix + name if name in Indicators: OneValueIndicators[name] = ts.levinson_durbin(Independent) # Cointegration name = "Cointegration" name = prefix + name if name in Indicators: OneValueIndicators[name] = ts.coint(Independent, Explanatory, trend="ct", return_results=False) ################################################## ##### Not Implemented ################################################## # BDS Statistic (residuals analysis) # Not Implemented # Return’s Ljung-Box Q Statistic (AR) # Not Implemented OneValueIndicators = pd.DataFrame.from_dict(OneValueIndicators, orient="index") return df, OneValueIndicators
def ssri(i, n, x, y, k): # ret=recresid(x[i-1:],y[i-1:]) mod = sm.RecursiveLS(x[i:], y[i:]) ret = mod.fit().cusum_squares return np.concatenate((np.array([np.nan] * k), ret))
row[0] = (row[0] - temp_min) / (temp_max - temp_min) #model = Sequential() #model.add(Dense(4,input_dim=1,activation='relu')) #model.add(Dense(4,activation='relu')) #model.add(Dense(1,activation='sigmoid')) #model.compile(loss='mean_squared_error',optimizer='SGD',metrics=['mean_squared_error']) # history=model.fit(temp,pressure,epochs=10,verbose=1) #scores = model.evaluate(temp,pressure) #print("Baseline Error: %.2f%%" % (100-scores[1]*100)) #predictions = model.predict(temp) #plt.plot(predictions,'r') #plt.plot(history.history['loss']) mod = sm.RecursiveLS(temp, pressure) res = mod.fit() print(res.summary()) plt.plot(temp, 'b') plt.plot(pressure, 'g') plt.legend(['Prediction', 'Temp', 'Pressure']) plt.show() except (Exception, psycopg2.Error) as error: print("Error ", error) finally: if (connection): cursor.close() connection.close() print("Postgresql connection is closed")
def fn_apis_statsmodels_recursive_ls(): import numpy as np x = request.args.get('x') y = request.args.get('y') x1 = np.array(eval(x)) y1 = np.array(eval(y)) #x1 = [[4,67,662],[9,19,618],[6,49,372],[6,33,58],[1,18,153],[2,78,938],[3,15,627],[8,55,191],[2,47,812],[2,83,946],[2,4,895],[9,37,42],[0,1,595],[7,27,392],[5,22,836],[0,12,513],[2,41,601],[3,68,615],[2,23,649],[1,98,9],[9,40,32],[5,77,798],[1,10,903],[1,53,772],[7,20,716],[2,35,678],[5,52,258],[7,31,814],[2,30,577]] #y1 = [2857.0163,2547.5962,1647.6061,343.8966,668.2108,3990.0414,2559.0662,945.1439,3393.1068,4037.1068,3596.0458,297.5798,2383.6193,1663.8839,3420.5135,2088.0197,2531.2703,2670.7878,2669.8044,332.9981,266.718,3433.975,3644.3636,3249.3518,2938.0325,2821.3308,1198.4373,3363.5752,2402.6042] x1 = sm.add_constant(x1) model = sm.RecursiveLS(y1, x1) rs = model.fit() c = rs_ols( rs.aic, rs.bic, rs.bse.tolist(), rs.conf_int().tolist(), rs.cov_kwds, rs.cov_params().tolist(), rs.cov_params_approx.tolist(), rs.cov_params_default.tolist(), rs.cov_params_oim.tolist(), rs.cov_params_opg.tolist(), rs.cov_params_robust.tolist(), rs.cov_params_robust_approx.tolist(), rs.cov_params_robust_oim.tolist(), rs.cov_type, rs.cusum.tolist(), rs.cusum_squares.tolist(), rs.data_in_cache, rs.df_resid, rs.filtered_state.tolist(), rs.filtered_state_cov.tolist(), rs.fittedvalues.tolist(), rs.forecasts.tolist(), rs.forecasts_error.tolist(), rs.forecasts_error_cov.tolist(), rs.hqic, rs.k_constant, rs.llf, rs.llf_obs.tolist(), rs.loglikelihood_burn, rs.nobs, rs.params.tolist(), rs.predict().tolist(), rs.predicted_state.tolist(), rs.predicted_state_cov.tolist(), rs.pvalues.tolist(), rs.resid.tolist(), rs.resid_recursive.tolist(), rs.scale, rs.smoothed_measurement_disturbance.tolist(), rs.smoothed_measurement_disturbance_cov.tolist(), rs.smoothed_state.tolist(), rs.smoothed_state_cov.tolist(), rs.smoothed_state_disturbance.tolist(), rs.smoothed_state_disturbance_cov.tolist(), rs.tvalues.tolist(), rs.use_t, rs.zvalues.tolist(), ) c = c.__dict__ tmp = json.dumps(c, ensure_ascii=False, indent=4) return Response(tmp, mimetype='application/json', headers={ "Access-Control-Allow-Origin": "http://127.0.0.0:5000", "Access-Control-Allow-Methods": "GET", "Access-Control-Allow-Headers": "x-requested-with,content-type", "Access-Control-Allow-Credentials": "true" })