def test_slice_notation(): endog = np.arange(10)*1.0 mod = KalmanFilter(k_endog=1, k_states=2) mod.bind(endog) # Test invalid __setitem__ def set_designs(): mod['designs'] = 1 def set_designs2(): mod['designs',0,0] = 1 def set_designs3(): mod[0] = 1 assert_raises(IndexError, set_designs) assert_raises(IndexError, set_designs2) assert_raises(IndexError, set_designs3) # Test invalid __getitem__ assert_raises(IndexError, lambda: mod['designs']) assert_raises(IndexError, lambda: mod['designs',0,0,0]) assert_raises(IndexError, lambda: mod[0]) # Test valid __setitem__, __getitem__ assert_equal(mod.design[0,0,0], 0) mod['design',0,0,0] = 1 assert_equal(mod['design'].sum(), 1) assert_equal(mod.design[0,0,0], 1) assert_equal(mod['design',0,0,0], 1) # Test valid __setitem__, __getitem__ with unspecified time index mod['design'] = np.zeros(mod['design'].shape) assert_equal(mod.design[0,0], 0) mod['design',0,0] = 1 assert_equal(mod.design[0,0], 1) assert_equal(mod['design',0,0], 1)
class Options(object): def __init__(self, *args, **kwargs): # Dummy data endog = np.arange(10) k_states = 1 self.model = KalmanFilter(k_endog=1, k_states=k_states, *args, **kwargs) self.model.bind(endog)
def test_cython(): # Test the cython _kalman_filter creation, re-creation, calling, etc. # Check that datatypes are correct: for prefix, dtype in tools.prefix_dtype_map.items(): endog = np.array(1., ndmin=2, dtype=dtype) mod = KalmanFilter(k_endog=1, k_states=1, dtype=dtype) # Bind data and initialize the ?KalmanFilter object mod.bind(endog) mod._initialize_filter() # Check that the dtype and prefix are correct assert_equal(mod.prefix, prefix) assert_equal(mod.dtype, dtype) # Test that a dKalmanFilter instance was created assert_equal(prefix in mod._kalman_filters, True) kf = mod._kalman_filters[prefix] assert_equal(isinstance(kf, tools.prefix_kalman_filter_map[prefix]), True) # Test that the default returned _kalman_filter is the above instance assert_equal(mod._kalman_filter, kf) # Check that upcasting datatypes / ?KalmanFilter works (e.g. d -> z) mod = KalmanFilter(k_endog=1, k_states=1) # Default dtype is float assert_equal(mod.prefix, 'd') assert_equal(mod.dtype, np.float64) # Prior to initialization, no ?KalmanFilter exists assert_equal(mod._kalman_filter, None) # Bind data and initialize the ?KalmanFilter object endog = np.ascontiguousarray(np.array([1., 2.], dtype=np.float64)) mod.bind(endog) mod._initialize_filter() kf = mod._kalman_filters['d'] # Rebind data, still float, check that we haven't changed mod.bind(endog) mod._initialize_filter() assert_equal(mod._kalman_filter, kf) # Force creating new ?Statespace and ?KalmanFilter, by changing the # time-varying character of an array mod.design = np.zeros((1, 1, 2)) mod._initialize_filter() assert_equal(mod._kalman_filter == kf, False) kf = mod._kalman_filters['d'] # Rebind data, now complex, check that the ?KalmanFilter instance has # changed endog = np.ascontiguousarray(np.array([1., 2.], dtype=np.complex128)) mod.bind(endog) assert_equal(mod._kalman_filter == kf, False)
def test_cython(): # Test the cython _kalman_filter creation, re-creation, calling, etc. # Check that datatypes are correct: for prefix, dtype in tools.prefix_dtype_map.items(): endog = np.array(1.0, ndmin=2, dtype=dtype) mod = KalmanFilter(k_endog=1, k_states=1, dtype=dtype) # Bind data and initialize the ?KalmanFilter object mod.bind(endog) mod._initialize_filter() # Check that the dtype and prefix are correct assert_equal(mod.prefix, prefix) assert_equal(mod.dtype, dtype) # Test that a dKalmanFilter instance was created assert_equal(prefix in mod._kalman_filters, True) kf = mod._kalman_filters[prefix] assert_equal(isinstance(kf, tools.prefix_kalman_filter_map[prefix]), True) # Test that the default returned _kalman_filter is the above instance assert_equal(mod._kalman_filter, kf) # Check that upcasting datatypes / ?KalmanFilter works (e.g. d -> z) mod = KalmanFilter(k_endog=1, k_states=1) # Default dtype is float assert_equal(mod.prefix, "d") assert_equal(mod.dtype, np.float64) # Prior to initialization, no ?KalmanFilter exists assert_equal(mod._kalman_filter, None) # Bind data and initialize the ?KalmanFilter object endog = np.ascontiguousarray(np.array([1.0, 2.0], dtype=np.float64)) mod.bind(endog) mod._initialize_filter() kf = mod._kalman_filters["d"] # Rebind data, still float, check that we haven't changed mod.bind(endog) mod._initialize_filter() assert_equal(mod._kalman_filter, kf) # Force creating new ?Statespace and ?KalmanFilter, by changing the # time-varying character of an array mod.design = np.zeros((1, 1, 2)) mod._initialize_filter() assert_equal(mod._kalman_filter == kf, False) kf = mod._kalman_filters["d"] # Rebind data, now complex, check that the ?KalmanFilter instance has # changed endog = np.ascontiguousarray(np.array([1.0, 2.0], dtype=np.complex128)) mod.bind(endog) assert_equal(mod._kalman_filter == kf, False)
def test_kalman_filter_pickle(data): # Construct the statespace representation true = results_kalman_filter.uc_uni k_states = 4 model = KalmanFilter(k_endog=1, k_states=k_states) model.bind(data['lgdp'].values) model.design[:, :, 0] = [1, 1, 0, 0] model.transition[([0, 0, 1, 1, 2, 3], [0, 3, 1, 2, 1, 3], [0, 0, 0, 0, 0, 0])] = [1, 1, 0, 0, 1, 1] model.selection = np.eye(model.k_states) # Update matrices with given parameters (sigma_v, sigma_e, sigma_w, phi_1, phi_2) = np.array( true['parameters'] ) model.transition[([1, 1], [1, 2], [0, 0])] = [phi_1, phi_2] model.state_cov[ np.diag_indices(k_states) + (np.zeros(k_states, dtype=int),)] = [ sigma_v ** 2, sigma_e ** 2, 0, sigma_w ** 2 ] # Initialization initial_state = np.zeros((k_states,)) initial_state_cov = np.eye(k_states) * 100 # Initialization: modification initial_state_cov = np.dot( np.dot(model.transition[:, :, 0], initial_state_cov), model.transition[:, :, 0].T ) model.initialize_known(initial_state, initial_state_cov) pkl_mod = cPickle.loads(cPickle.dumps(model)) results = model.filter() pkl_results = pkl_mod.filter() assert_allclose(results.llf_obs[true['start']:].sum(), pkl_results.llf_obs[true['start']:].sum()) assert_allclose(results.filtered_state[0][true['start']:], pkl_results.filtered_state[0][true['start']:]) assert_allclose(results.filtered_state[1][true['start']:], pkl_results.filtered_state[1][true['start']:]) assert_allclose(results.filtered_state[3][true['start']:], pkl_results.filtered_state[3][true['start']:])
def main1(): for equity in os.listdir(rawDataDir): infp = PurePath(str(rawDataDir) + "/" + equity) df = pd.read_parquet(infp) volume_M = df.volume.sum() / df.shape[0] # produce the volume bar vbar = volume_bar_df(df, 'volume', volume_M) vbar.set_index('dates', inplace=True) # return vbar['retClose'] = vbar['price'] / vbar['price'].shift(1) - 1 # daily vol vbar['dailyVol'] = getDailyVol(vbar['price']) # normOI and VPIN vbar = orderFlow(vbar) # kf setting, assume random walk kf = KalmanFilter(1, 1) sigma_h = 0.0001 # hidden sigma_e = 0.001 # obs kf.obs_cov = np.array([sigma_e]) kf.state_cov = np.array([sigma_h]) kf.design = np.array([1.0]) kf.transition = np.array([1.0]) kf.selection = np.array([1.0]) kf.initialize_known(np.array([vbar.price[0]]), np.array([[sigma_h]])) kf.bind(np.array(vbar.price.copy())) r = kf.filter() vbar['forecasts'] = pd.DataFrame(r.forecasts[0], index=vbar.index) vbar['forecasts_error'] = pd.DataFrame(r.forecasts_error[0], index=vbar.index) vbar['error_std'] = pd.DataFrame(np.sqrt(r.forecasts_error_cov[0][0]), index=vbar.index) vbar = vbar.dropna() # srl_corr vbar['srl_corr'] = df_rolling_autocorr(vbar['price'], window=100).rename('srl_corr') vbar = vbar.dropna() ## output tmpPath = str(interimDataDir) + "/" + equity outfp = PurePath(tmpPath) print(outfp) vbar.to_parquet(outfp) print("Success: save") return
def test_slice_notation(): # Test setting and getting state space representation matrices using the # slice notation. endog = np.arange(10) * 1.0 mod = KalmanFilter(k_endog=1, k_states=2) mod.bind(endog) # Test invalid __setitem__ def set_designs(): mod['designs'] = 1 def set_designs2(): mod['designs', 0, 0] = 1 def set_designs3(): mod[0] = 1 assert_raises(IndexError, set_designs) assert_raises(IndexError, set_designs2) assert_raises(IndexError, set_designs3) # Test invalid __getitem__ assert_raises(IndexError, lambda: mod['designs']) assert_raises(IndexError, lambda: mod['designs', 0, 0, 0]) assert_raises(IndexError, lambda: mod[0]) # Test valid __setitem__, __getitem__ assert_equal(mod.design[0, 0, 0], 0) mod['design', 0, 0, 0] = 1 assert_equal(mod['design'].sum(), 1) assert_equal(mod.design[0, 0, 0], 1) assert_equal(mod['design', 0, 0, 0], 1) # Test valid __setitem__, __getitem__ with unspecified time index mod['design'] = np.zeros(mod['design'].shape) assert_equal(mod.design[0, 0], 0) mod['design', 0, 0] = 1 assert_equal(mod.design[0, 0], 1) assert_equal(mod['design', 0, 0], 1)
def test_slice_notation(): # Test setting and getting state space representation matrices using the # slice notation. endog = np.arange(10) * 1.0 mod = KalmanFilter(k_endog=1, k_states=2) mod.bind(endog) # Test invalid __setitem__ def set_designs(): mod["designs"] = 1 def set_designs2(): mod["designs", 0, 0] = 1 def set_designs3(): mod[0] = 1 assert_raises(IndexError, set_designs) assert_raises(IndexError, set_designs2) assert_raises(IndexError, set_designs3) # Test invalid __getitem__ assert_raises(IndexError, lambda: mod["designs"]) assert_raises(IndexError, lambda: mod["designs", 0, 0, 0]) assert_raises(IndexError, lambda: mod[0]) # Test valid __setitem__, __getitem__ assert_equal(mod.design[0, 0, 0], 0) mod["design", 0, 0, 0] = 1 assert_equal(mod["design"].sum(), 1) assert_equal(mod.design[0, 0, 0], 1) assert_equal(mod["design", 0, 0, 0], 1) # Test valid __setitem__, __getitem__ with unspecified time index mod["design"] = np.zeros(mod["design"].shape) assert_equal(mod.design[0, 0], 0) mod["design", 0, 0] = 1 assert_equal(mod.design[0, 0], 1) assert_equal(mod["design", 0, 0], 1)
class Clark1987(object): """ Clark's (1987) univariate unobserved components model of real GDP (as presented in Kim and Nelson, 1999) Test data produced using GAUSS code described in Kim and Nelson (1999) and found at http://econ.korea.ac.kr/~cjkim/SSMARKOV.htm See `results.results_kalman_filter` for more information. """ def __init__(self, dtype=float, **kwargs): self.true = results_kalman_filter.uc_uni self.true_states = pd.DataFrame(self.true['states']) # GDP, Quarterly, 1947.1 - 1995.3 data = pd.DataFrame( self.true['data'], index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'), columns=['GDP'] ) data['lgdp'] = np.log(data['GDP']) # Construct the statespace representation k_states = 4 self.model = KalmanFilter(k_endog=1, k_states=k_states, **kwargs) self.model.bind(data['lgdp'].values) self.model.design[:, :, 0] = [1, 1, 0, 0] self.model.transition[([0, 0, 1, 1, 2, 3], [0, 3, 1, 2, 1, 3], [0, 0, 0, 0, 0, 0])] = [1, 1, 0, 0, 1, 1] self.model.selection = np.eye(self.model.k_states) # Update matrices with given parameters (sigma_v, sigma_e, sigma_w, phi_1, phi_2) = np.array( self.true['parameters'] ) self.model.transition[([1, 1], [1, 2], [0, 0])] = [phi_1, phi_2] self.model.state_cov[ np.diag_indices(k_states)+(np.zeros(k_states, dtype=int),)] = [ sigma_v**2, sigma_e**2, 0, sigma_w**2 ] # Initialization initial_state = np.zeros((k_states,)) initial_state_cov = np.eye(k_states)*100 # Initialization: modification initial_state_cov = np.dot( np.dot(self.model.transition[:, :, 0], initial_state_cov), self.model.transition[:, :, 0].T ) self.model.initialize_known(initial_state, initial_state_cov) def run_filter(self): # Filter the data self.results = self.model.filter() def test_loglike(self): assert_almost_equal( self.results.llf_obs[self.true['start']:].sum(), self.true['loglike'], 5 ) def test_filtered_state(self): assert_almost_equal( self.results.filtered_state[0][self.true['start']:], self.true_states.iloc[:, 0], 4 ) assert_almost_equal( self.results.filtered_state[1][self.true['start']:], self.true_states.iloc[:, 1], 4 ) assert_almost_equal( self.results.filtered_state[3][self.true['start']:], self.true_states.iloc[:, 2], 4 )
class Clark1989(object): """ Clark's (1989) bivariate unobserved components model of real GDP (as presented in Kim and Nelson, 1999) Tests two-dimensional observation data. Test data produced using GAUSS code described in Kim and Nelson (1999) and found at http://econ.korea.ac.kr/~cjkim/SSMARKOV.htm See `results.results_kalman_filter` for more information. """ def __init__(self, dtype=float, **kwargs): self.true = results_kalman_filter.uc_bi self.true_states = pd.DataFrame(self.true['states']) # GDP and Unemployment, Quarterly, 1948.1 - 1995.3 data = pd.DataFrame( self.true['data'], index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'), columns=['GDP', 'UNEMP'] )[4:] data['GDP'] = np.log(data['GDP']) data['UNEMP'] = (data['UNEMP']/100) k_states = 6 self.model = KalmanFilter(k_endog=2, k_states=k_states, **kwargs) self.model.bind(np.ascontiguousarray(data.values)) # Statespace representation self.model.design[:, :, 0] = [[1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1]] self.model.transition[ ([0, 0, 1, 1, 2, 3, 4, 5], [0, 4, 1, 2, 1, 2, 4, 5], [0, 0, 0, 0, 0, 0, 0, 0]) ] = [1, 1, 0, 0, 1, 1, 1, 1] self.model.selection = np.eye(self.model.k_states) # Update matrices with given parameters (sigma_v, sigma_e, sigma_w, sigma_vl, sigma_ec, phi_1, phi_2, alpha_1, alpha_2, alpha_3) = np.array( self.true['parameters'], ) self.model.design[([1, 1, 1], [1, 2, 3], [0, 0, 0])] = [ alpha_1, alpha_2, alpha_3 ] self.model.transition[([1, 1], [1, 2], [0, 0])] = [phi_1, phi_2] self.model.obs_cov[1, 1, 0] = sigma_ec**2 self.model.state_cov[ np.diag_indices(k_states)+(np.zeros(k_states, dtype=int),)] = [ sigma_v**2, sigma_e**2, 0, 0, sigma_w**2, sigma_vl**2 ] # Initialization initial_state = np.zeros((k_states,)) initial_state_cov = np.eye(k_states)*100 # Initialization: self.modelification initial_state_cov = np.dot( np.dot(self.model.transition[:, :, 0], initial_state_cov), self.model.transition[:, :, 0].T ) self.model.initialize_known(initial_state, initial_state_cov) def run_filter(self): # Filter the data self.results = self.model.filter() def test_loglike(self): assert_almost_equal( # self.results.llf_obs[self.true['start']:].sum(), self.results.llf_obs[0:].sum(), self.true['loglike'], 2 ) def test_filtered_state(self): assert_almost_equal( self.results.filtered_state[0][self.true['start']:], self.true_states.iloc[:, 0], 4 ) assert_almost_equal( self.results.filtered_state[1][self.true['start']:], self.true_states.iloc[:, 1], 4 ) assert_almost_equal( self.results.filtered_state[4][self.true['start']:], self.true_states.iloc[:, 2], 4 ) assert_almost_equal( self.results.filtered_state[5][self.true['start']:], self.true_states.iloc[:, 3], 4 )
class Clark1987(object): """ Clark's (1987) univariate unobserved components model of real GDP (as presented in Kim and Nelson, 1999) Test data produced using GAUSS code described in Kim and Nelson (1999) and found at http://econ.korea.ac.kr/~cjkim/SSMARKOV.htm See `results.results_kalman_filter` for more information. """ def __init__(self, dtype=float, **kwargs): self.true = results_kalman_filter.uc_uni self.true_states = pd.DataFrame(self.true['states']) # GDP, Quarterly, 1947.1 - 1995.3 data = pd.DataFrame(self.true['data'], index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'), columns=['GDP']) data['lgdp'] = np.log(data['GDP']) # Construct the statespace representation k_states = 4 self.model = KalmanFilter(k_endog=1, k_states=k_states, **kwargs) self.model.bind(data['lgdp'].values) self.model.design[:, :, 0] = [1, 1, 0, 0] self.model.transition[([0, 0, 1, 1, 2, 3], [0, 3, 1, 2, 1, 3], [0, 0, 0, 0, 0, 0])] = [1, 1, 0, 0, 1, 1] self.model.selection = np.eye(self.model.k_states) # Update matrices with given parameters (sigma_v, sigma_e, sigma_w, phi_1, phi_2) = np.array(self.true['parameters']) self.model.transition[([1, 1], [1, 2], [0, 0])] = [phi_1, phi_2] self.model.state_cov[np.diag_indices(k_states) + (np.zeros(k_states, dtype=int), )] = [ sigma_v**2, sigma_e**2, 0, sigma_w**2 ] # Initialization initial_state = np.zeros((k_states, )) initial_state_cov = np.eye(k_states) * 100 # Initialization: modification initial_state_cov = np.dot( np.dot(self.model.transition[:, :, 0], initial_state_cov), self.model.transition[:, :, 0].T) self.model.initialize_known(initial_state, initial_state_cov) def run_filter(self): # Filter the data self.results = self.model.filter() def test_loglike(self): assert_almost_equal(self.results.llf_obs[self.true['start']:].sum(), self.true['loglike'], 5) def test_filtered_state(self): assert_almost_equal( self.results.filtered_state[0][self.true['start']:], self.true_states.iloc[:, 0], 4) assert_almost_equal( self.results.filtered_state[1][self.true['start']:], self.true_states.iloc[:, 1], 4) assert_almost_equal( self.results.filtered_state[3][self.true['start']:], self.true_states.iloc[:, 2], 4)
class Clark1989(object): """ Clark's (1989) bivariate unobserved components model of real GDP (as presented in Kim and Nelson, 1999) Tests two-dimensional observation data. Test data produced using GAUSS code described in Kim and Nelson (1999) and found at http://econ.korea.ac.kr/~cjkim/SSMARKOV.htm See `results.results_kalman_filter` for more information. """ def __init__(self, dtype=float, **kwargs): self.true = results_kalman_filter.uc_bi self.true_states = pd.DataFrame(self.true['states']) # GDP and Unemployment, Quarterly, 1948.1 - 1995.3 data = pd.DataFrame(self.true['data'], index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'), columns=['GDP', 'UNEMP'])[4:] data['GDP'] = np.log(data['GDP']) data['UNEMP'] = (data['UNEMP'] / 100) k_states = 6 self.model = KalmanFilter(k_endog=2, k_states=k_states, **kwargs) self.model.bind(np.ascontiguousarray(data.values)) # Statespace representation self.model.design[:, :, 0] = [[1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1]] self.model.transition[([0, 0, 1, 1, 2, 3, 4, 5], [0, 4, 1, 2, 1, 2, 4, 5], [0, 0, 0, 0, 0, 0, 0, 0])] = [1, 1, 0, 0, 1, 1, 1, 1] self.model.selection = np.eye(self.model.k_states) # Update matrices with given parameters (sigma_v, sigma_e, sigma_w, sigma_vl, sigma_ec, phi_1, phi_2, alpha_1, alpha_2, alpha_3) = np.array(self.true['parameters'], ) self.model.design[([1, 1, 1], [1, 2, 3], [0, 0, 0])] = [alpha_1, alpha_2, alpha_3] self.model.transition[([1, 1], [1, 2], [0, 0])] = [phi_1, phi_2] self.model.obs_cov[1, 1, 0] = sigma_ec**2 self.model.state_cov[np.diag_indices(k_states) + (np.zeros(k_states, dtype=int), )] = [ sigma_v**2, sigma_e**2, 0, 0, sigma_w**2, sigma_vl**2 ] # Initialization initial_state = np.zeros((k_states, )) initial_state_cov = np.eye(k_states) * 100 # Initialization: self.modelification initial_state_cov = np.dot( np.dot(self.model.transition[:, :, 0], initial_state_cov), self.model.transition[:, :, 0].T) self.model.initialize_known(initial_state, initial_state_cov) def run_filter(self): # Filter the data self.results = self.model.filter() def test_loglike(self): assert_almost_equal( # self.results.llf_obs[self.true['start']:].sum(), self.results.llf_obs[0:].sum(), self.true['loglike'], 2) def test_filtered_state(self): assert_almost_equal( self.results.filtered_state[0][self.true['start']:], self.true_states.iloc[:, 0], 4) assert_almost_equal( self.results.filtered_state[1][self.true['start']:], self.true_states.iloc[:, 1], 4) assert_almost_equal( self.results.filtered_state[4][self.true['start']:], self.true_states.iloc[:, 2], 4) assert_almost_equal( self.results.filtered_state[5][self.true['start']:], self.true_states.iloc[:, 3], 4)