def test_kalman_filter_pickle(data): # Construct the statespace representation true = results_kalman_filter.uc_uni k_states = 4 model = KalmanFilter(k_endog=1, k_states=k_states) model.bind(data['lgdp'].values) model.design[:, :, 0] = [1, 1, 0, 0] model.transition[([0, 0, 1, 1, 2, 3], [0, 3, 1, 2, 1, 3], [0, 0, 0, 0, 0, 0])] = [1, 1, 0, 0, 1, 1] model.selection = np.eye(model.k_states) # Update matrices with given parameters (sigma_v, sigma_e, sigma_w, phi_1, phi_2) = np.array( true['parameters'] ) model.transition[([1, 1], [1, 2], [0, 0])] = [phi_1, phi_2] model.state_cov[ np.diag_indices(k_states) + (np.zeros(k_states, dtype=int),)] = [ sigma_v ** 2, sigma_e ** 2, 0, sigma_w ** 2 ] # Initialization initial_state = np.zeros((k_states,)) initial_state_cov = np.eye(k_states) * 100 # Initialization: modification initial_state_cov = np.dot( np.dot(model.transition[:, :, 0], initial_state_cov), model.transition[:, :, 0].T ) model.initialize_known(initial_state, initial_state_cov) pkl_mod = cPickle.loads(cPickle.dumps(model)) results = model.filter() pkl_results = pkl_mod.filter() assert_allclose(results.llf_obs[true['start']:].sum(), pkl_results.llf_obs[true['start']:].sum()) assert_allclose(results.filtered_state[0][true['start']:], pkl_results.filtered_state[0][true['start']:]) assert_allclose(results.filtered_state[1][true['start']:], pkl_results.filtered_state[1][true['start']:]) assert_allclose(results.filtered_state[3][true['start']:], pkl_results.filtered_state[3][true['start']:])
def main1(): for equity in os.listdir(rawDataDir): infp = PurePath(str(rawDataDir) + "/" + equity) df = pd.read_parquet(infp) volume_M = df.volume.sum() / df.shape[0] # produce the volume bar vbar = volume_bar_df(df, 'volume', volume_M) vbar.set_index('dates', inplace=True) # return vbar['retClose'] = vbar['price'] / vbar['price'].shift(1) - 1 # daily vol vbar['dailyVol'] = getDailyVol(vbar['price']) # normOI and VPIN vbar = orderFlow(vbar) # kf setting, assume random walk kf = KalmanFilter(1, 1) sigma_h = 0.0001 # hidden sigma_e = 0.001 # obs kf.obs_cov = np.array([sigma_e]) kf.state_cov = np.array([sigma_h]) kf.design = np.array([1.0]) kf.transition = np.array([1.0]) kf.selection = np.array([1.0]) kf.initialize_known(np.array([vbar.price[0]]), np.array([[sigma_h]])) kf.bind(np.array(vbar.price.copy())) r = kf.filter() vbar['forecasts'] = pd.DataFrame(r.forecasts[0], index=vbar.index) vbar['forecasts_error'] = pd.DataFrame(r.forecasts_error[0], index=vbar.index) vbar['error_std'] = pd.DataFrame(np.sqrt(r.forecasts_error_cov[0][0]), index=vbar.index) vbar = vbar.dropna() # srl_corr vbar['srl_corr'] = df_rolling_autocorr(vbar['price'], window=100).rename('srl_corr') vbar = vbar.dropna() ## output tmpPath = str(interimDataDir) + "/" + equity outfp = PurePath(tmpPath) print(outfp) vbar.to_parquet(outfp) print("Success: save") return
#y = a + e #If we can only observe y, what can we say about α? #This acts like a filter trying to recover a signal by filtering out noise. #A linear filter. # a is the state and y is the observation (equations) import statsmodels.tsa.statespace.kalman_filter from statsmodels.tsa.statespace.kalman_filter import KalmanFilter kf = KalmanFilter(1,1) kf.obs_cov = np.array([sigma_e]) # H kf.state_cov = np.array([sigma_h]) # Q kf.design = np.array([1.0]) # Z kf.transition = np.array([1.0]) # T kf.selection = np.array([1.0]) # R ys, ah = kf.simulate(100)