def setupClass(cls): grun_data = grunfeld.load_pandas().data index_data = grun_data.set_index(['firm']) index_group = index_data.index cls.grouping = Grouping(index_group) cls.data = index_data cls.expected_counts = [20] * 11
def setupClass(cls): grun_data = grunfeld.load_pandas().data multi_index_data = grun_data.set_index(['firm', 'year']) multi_index_panel = multi_index_data.index cls.grouping = Grouping(multi_index_panel) cls.data = multi_index_data cls.expected_counts = [20] * 11
def test_init_api(): # make a multi-index panel grun_data = grunfeld.load_pandas().data multi_index_panel = grun_data.set_index(['firm', 'year']).index grouping = Grouping(multi_index_panel) # check group_names np.testing.assert_array_equal(grouping.group_names, ['firm', 'year']) # check shape np.testing.assert_array_equal(grouping.index_shape, (11, 20)) # check index_int np.testing.assert_array_equal(grouping.labels, [[ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ]]) grouping = Grouping(multi_index_panel, names=['firms', 'year']) np.testing.assert_array_equal(grouping.group_names, ['firms', 'year']) # make a multi-index grouping anes_data = anes96.load_pandas().data multi_index_groups = anes_data.set_index(['educ', 'income', 'TVnews']).index grouping = Grouping(multi_index_groups) np.testing.assert_array_equal(grouping.group_names, ['educ', 'income', 'TVnews']) np.testing.assert_array_equal(grouping.index_shape, (7, 24, 8)) # make a list multi-index panel list_panel = multi_index_panel.tolist() grouping = Grouping(list_panel, names=['firms', 'year']) np.testing.assert_array_equal(grouping.group_names, ['firms', 'year']) np.testing.assert_array_equal(grouping.index_shape, (11, 20)) # make a list multi-index grouping list_groups = multi_index_groups.tolist() grouping = Grouping(list_groups, names=['educ', 'income', 'TVnews']) np.testing.assert_array_equal(grouping.group_names, ['educ', 'income', 'TVnews']) np.testing.assert_array_equal(grouping.index_shape, (7, 24, 8)) # single-variable index grouping index_group = multi_index_panel.get_level_values(0) grouping = Grouping(index_group) # the original multi_index_panel had it's name changed inplace above np.testing.assert_array_equal(grouping.group_names, ['firms']) np.testing.assert_array_equal(grouping.index_shape, (220, )) # single variable list grouping list_group = multi_index_panel.get_level_values(0).tolist() grouping = Grouping(list_group) np.testing.assert_array_equal(grouping.group_names, ["group0"]) np.testing.assert_array_equal(grouping.index_shape, 11 * 20) # test generic group names grouping = Grouping(list_groups) np.testing.assert_array_equal(grouping.group_names, ['group0', 'group1', 'group2'])
def test_init_api(): # make a multi-index panel grun_data = grunfeld.load_pandas().data multi_index_panel = grun_data.set_index(['firm', 'year']).index grouping = Grouping(multi_index_panel) # check group_names np.testing.assert_array_equal(grouping.group_names, ['firm', 'year']) # check shape np.testing.assert_array_equal(grouping.index_shape, (11, 20)) # check index_int np.testing.assert_array_equal(grouping.labels, [[ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]) grouping = Grouping(multi_index_panel, names=['firms', 'year']) np.testing.assert_array_equal(grouping.group_names, ['firms', 'year']) # make a multi-index grouping anes_data = anes96.load_pandas().data multi_index_groups = anes_data.set_index(['educ', 'income', 'TVnews']).index grouping = Grouping(multi_index_groups) np.testing.assert_array_equal(grouping.group_names, ['educ', 'income', 'TVnews']) np.testing.assert_array_equal(grouping.index_shape, (7, 24, 8)) # make a list multi-index panel list_panel = multi_index_panel.tolist() grouping = Grouping(list_panel, names=['firms', 'year']) np.testing.assert_array_equal(grouping.group_names, ['firms', 'year']) np.testing.assert_array_equal(grouping.index_shape, (11, 20)) # make a list multi-index grouping list_groups = multi_index_groups.tolist() grouping = Grouping(list_groups, names=['educ', 'income', 'TVnews']) np.testing.assert_array_equal(grouping.group_names, ['educ', 'income', 'TVnews']) np.testing.assert_array_equal(grouping.index_shape, (7, 24, 8)) # single-variable index grouping index_group = multi_index_panel.get_level_values(0) grouping = Grouping(index_group) # the original multi_index_panel had it's name changed inplace above np.testing.assert_array_equal(grouping.group_names, ['firms']) np.testing.assert_array_equal(grouping.index_shape, (220,)) # single variable list grouping list_group = multi_index_panel.get_level_values(0).tolist() grouping = Grouping(list_group) np.testing.assert_array_equal(grouping.group_names, ["group0"]) np.testing.assert_array_equal(grouping.index_shape, 11*20) # test generic group names grouping = Grouping(list_groups) np.testing.assert_array_equal(grouping.group_names, ['group0', 'group1', 'group2'])
from statsmodels.datasets import grunfeld import time from ipca import IPCARegressor # Test Construction Errors @pytest.mark.fast_test def test_construction_errors(): assert_raises(ValueError, IPCARegressor, n_factors=0) assert_raises(NotImplementedError, IPCARegressor, intercept='jabberwocky') assert_raises(ValueError, IPCARegressor, iter_tol=2) # Create test data and run package data = grunfeld.load_pandas().data data.year = data.year.astype(np.int64) #data.firm = data.firm.apply(lambda x: x.decode('utf-8')) # Establish unique IDs to conform with package N = len(np.unique(data.firm)) ID = dict(zip(np.unique(data.firm).tolist(), np.arange(1, N + 1) + 5)) data.firm = data.firm.apply(lambda x: ID[x]) # Ensure that ordering of the data is correct data = data[['firm', 'year', 'invest', 'value', 'capital']] # Convert to numpy data = data.to_numpy() PSF = np.random.randn(len(np.unique(data[:, 1])), 2) PSF = PSF.reshape((2, -1)) # Test IPCARegressor regr = IPCARegressor(n_factors=1, intercept=False)