#take I(2) series, etc.). #not implemented since is not entirely needed for Granger test. #2. difference once to I(1) series which are roughly stationary #(e ~iid N(0,sigma^2)) #not implemented since is not entirely necesary for Granger test. #3. calculate partial autocorrelation function (PACF) w/ Ljung-Box test to see #which lags help predict current values. clicksPACF = stattools.pacf_ols(clicksPerDay, nlags=MAX_LAG) encountersPACF = stattools.pacf_ols(encountersPerDay, nlags=MAX_LAG) #Ljung-Box test clicksPACF_LJ = _math.ljungBox(clicksPACF, len(clicksPerDay), MAX_LAG) encountersPACF_LJ = _math.ljungBox(encountersPACF, len(encountersPerDay), MAX_LAG) print("clicks PACF Ljung-Box") print(tabulate(clicksPACF_LJ, headers=["lag", "R", "Q", "p-val"])) print("encounters PACF Ljung-Box") print(tabulate(encountersPACF_LJ, headers=["lag", "R", "Q", "p-val"])) #plot def plotxy(title, x, y, LSRL=None): fig = plt.figure() plt.bar(x, y, width=0.08) plt.grid() plt.title(title) fig.autofmt_xdate() plt.show()
#clicksPerDay.index.name = None #encountersPerDay.index.name = None clicksPerDay = pd.Series(data=clicksPerDay["count_clicks"], index=clicksPerDay.index) encountersPerDay = pd.Series(data=encountersPerDay["count_encounter"], index=encountersPerDay.index) clicksPerDay = clicksPerDay.fillna(method="ffill") encountersPerDay = encountersPerDay.fillna(method="ffill") ####################LJUNG-BOX#################### clicksPACF = stattools.pacf_ols(clicksPerDay, nlags=MAX_LAG) encountersPACF = stattools.pacf_ols(encountersPerDay, nlags=MAX_LAG) #my implementation results = _math.ljungBox(encountersPACF, len(encountersPerDay), MAX_LAG) lag, R, Q, p = zip(*results) print np.asarray(Q[1:]) print np.asarray(p[1:]) #statsmodels implementation results = diagnostic.acorr_ljungbox(encountersPerDay, lags=MAX_LAG) print results #my copy of the statsmodels implementation results = _math.ljungBox2(encountersPerDay, maxlag=MAX_LAG) print results #they are not the same... I wonder why.
tval = tNum/tDen df = len(R)-3 pval = scipy.stats.t.sf(tval, df) header = ["type", "value"] data = [ ["tval",tval], ["pval",pval], ["df",df] ] print("\n") print("*William's Modified Hotelling T Test*") print(tabulate.tabulate(data, headers=header)) #Ljung Box Test & Autocorrelation/Crosscorrelation coeffs = _math.ljungBox(R, R, maxLag=7) header = ["lag", "R value", "Q", "p value"] data = coeffs print("\n") print("*Ljung-Box Test for Crosscorrelation*") print("## R vs R ##") print(tabulate.tabulate(data, headers=header)) coeffs = _math.ljungBox(t, t, maxLag=7) header = ["lag", "R value", "Q", "p value"] data = coeffs print("## t vs t ##") print(tabulate.tabulate(data, headers=header)) coeffs = _math.ljungBox(rsent, rsent, maxLag=7) header = ["lag", "R value", "Q", "p value"]