def main1(): for equity in os.listdir(rawDataDir): infp = PurePath(str(rawDataDir) + "/" + equity) df = pd.read_parquet(infp) volume_M = df.volume.sum() / df.shape[0] # produce the volume bar vbar = volume_bar_df(df, 'volume', volume_M) vbar.set_index('dates', inplace=True) # return vbar['retClose'] = vbar['price'] / vbar['price'].shift(1) - 1 # daily vol vbar['dailyVol'] = getDailyVol(vbar['price']) # normOI and VPIN vbar = orderFlow(vbar) # kf setting, assume random walk kf = KalmanFilter(1, 1) sigma_h = 0.0001 # hidden sigma_e = 0.001 # obs kf.obs_cov = np.array([sigma_e]) kf.state_cov = np.array([sigma_h]) kf.design = np.array([1.0]) kf.transition = np.array([1.0]) kf.selection = np.array([1.0]) kf.initialize_known(np.array([vbar.price[0]]), np.array([[sigma_h]])) kf.bind(np.array(vbar.price.copy())) r = kf.filter() vbar['forecasts'] = pd.DataFrame(r.forecasts[0], index=vbar.index) vbar['forecasts_error'] = pd.DataFrame(r.forecasts_error[0], index=vbar.index) vbar['error_std'] = pd.DataFrame(np.sqrt(r.forecasts_error_cov[0][0]), index=vbar.index) vbar = vbar.dropna() # srl_corr vbar['srl_corr'] = df_rolling_autocorr(vbar['price'], window=100).rename('srl_corr') vbar = vbar.dropna() ## output tmpPath = str(interimDataDir) + "/" + equity outfp = PurePath(tmpPath) print(outfp) vbar.to_parquet(outfp) print("Success: save") return
df['y'] = a[0:100] + e[0:100] _=df.plot(figsize=(14,6), style=['b--', 'g-',]) _=df.y.plot(figsize=(14,6), style=['g-',]) #y = a + e #If we can only observe y, what can we say about α? #This acts like a filter trying to recover a signal by filtering out noise. #A linear filter. # a is the state and y is the observation (equations) import statsmodels.tsa.statespace.kalman_filter from statsmodels.tsa.statespace.kalman_filter import KalmanFilter kf = KalmanFilter(1,1) kf.obs_cov = np.array([sigma_e]) # H kf.state_cov = np.array([sigma_h]) # Q kf.design = np.array([1.0]) # Z kf.transition = np.array([1.0]) # T kf.selection = np.array([1.0]) # R ys, ah = kf.simulate(100)