def test__get_ts__asof_latest(): """ I can get the latest known value for each sample date """ df = groupby_asof(get_bitemporal_test_data()) assert len(df) == 4 assert all(df['OPEN'] == [1.1, 2.1, 3.1, 4.1]) assert all(df['CLOSE'] == [10.1, 20.1, 30.1, 40.1])
def test__get_ts__asof_datetime(): """ I can get a timeseries as-of a particular point in time """ df = groupby_asof(get_bitemporal_test_data(), as_of=dt('2015-01-05')) assert len(df) == 3 assert all(df['OPEN'] == [1.1, 2.1, 3.0]) assert all(df['CLOSE'] == [10.1, 20.1, 30.0])
def read(self, symbol, as_of=None, raw=False, **kwargs): # TODO: shall we block from_version from getting into super.read? """Read data for the named symbol. Returns a BitemporalItem object with a data and metdata element (as passed into write). Parameters ---------- symbol : `str` symbol name for the item as_of : `datetime.datetime` Return the data as it was as_of the point in time. raw : `bool` If True, will return the full bitemporal dataframe (i.e. all versions of the data). This also means as_of is ignored. Returns ------- BitemporalItem namedtuple which contains a .data and .metadata element """ item = self._store.read(symbol, **kwargs) last_updated = max(item.data.index.get_level_values(self.observe_column)) if raw: return BitemporalItem(symbol=symbol, library=self._store._arctic_lib.get_name(), data=item.data, metadata=item.metadata, last_updated=last_updated) else: index_names = list(item.data.index.names) index_names.remove(self.observe_column) return BitemporalItem(symbol=symbol, library=self._store._arctic_lib.get_name(), data=groupby_asof(item.data, as_of=as_of, dt_col=index_names, asof_col=self.observe_column), metadata=item.metadata, last_updated=last_updated)
def test_fancy_group_by_multi_index(): ts = multi_index_df_from_arrs( index_headers=('index 1', 'index 2', 'observed_dt'), index_arrs=[[ '2012-09-08 17:06:11.040', '2012-09-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-09 17:06:11.040', '2012-10-09 17:06:11.040', '2012-11-08 17:06:11.040', ], ['SPAM Index', 'EGG Index', 'SPAM Index', 'SPAM Index'] + ['EGG Index', 'SPAM Index'] * 2, ['2015-01-01'] * 3 + ['2015-01-05'] + ['2015-01-01'] * 4], data_dict={'near': [1.0, 1.6, 2.0, 4.2, 2.1, 2.5, 2.6, 3.0]}) expected_ts = multi_index_df_from_arrs( index_headers=('index 1', 'index 2'), index_arrs=[[ '2012-09-08 17:06:11.040', '2012-09-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-09 17:06:11.040', '2012-10-09 17:06:11.040', '2012-11-08 17:06:11.040', ], ['EGG Index', 'SPAM Index'] * 3 + ['SPAM Index']], data_dict={'near': [1.6, 1.0, 2.1, 4.2, 2.6, 2.5, 3.0]}) assert_frame_equal( expected_ts, groupby_asof(ts, dt_col=['index 1', 'index 2'], asof_col='observed_dt'))
def test__can_insert_row(): """ I can insert a new row into a bitemp ts and it comes back when selecting the latest data """ df = get_bitemporal_test_data() df = insert_at(df, dt('2014-01-03'), [[9, 90]]) assert len(df) == 9 df = groupby_asof(df) assert len(df) == 4 assert df.loc[dt('2014-01-03')]['OPEN'] == 9 assert df.loc[dt('2014-01-03')]['CLOSE'] == 90
def test__get_ts__unsorted_index(): """ I can get a timeseries as-of a date when the index isn't sorted properly """ df = get_bitemporal_test_data() # Swap the third and fourth rows around. This would break the group-by if we didn't check # for sortedness df = df.reindex(df.index[[0, 1, 3, 2, 4, 5, 6, 7]]) df = groupby_asof(df) assert len(df) == 4 assert all(df['OPEN'] == [1.1, 2.1, 3.1, 4.1]) assert all(df['CLOSE'] == [10.1, 20.1, 30.1, 40.1])
def test_fancy_group_by_multi_index(): ts = read_str_as_pandas(""" index 1 | index 2 | observed_dt | near 2012-09-08 17:06:11.040 | SPAM Index | 2015-01-01 | 1.0 2012-09-08 17:06:11.040 | EGG Index | 2015-01-01 | 1.6 2012-10-08 17:06:11.040 | SPAM Index | 2015-01-01 | 2.0 2012-10-08 17:06:11.040 | SPAM Index | 2015-01-05 | 4.2 2012-10-08 17:06:11.040 | EGG Index | 2015-01-01 | 2.1 2012-10-09 17:06:11.040 | SPAM Index | 2015-01-01 | 2.5 2012-10-09 17:06:11.040 | EGG Index | 2015-01-01 | 2.6 2012-11-08 17:06:11.040 | SPAM Index | 2015-01-01 | 3.0""", num_index=3) expected_ts = read_str_as_pandas(""" index 1 | index 2 | near 2012-09-08 17:06:11.040 | EGG Index | 1.6 2012-09-08 17:06:11.040 | SPAM Index | 1.0 2012-10-08 17:06:11.040 | EGG Index | 2.1 2012-10-08 17:06:11.040 | SPAM Index | 4.2 2012-10-09 17:06:11.040 | EGG Index | 2.6 2012-10-09 17:06:11.040 | SPAM Index | 2.5 2012-11-08 17:06:11.040 | SPAM Index | 3.0""", num_index=2) assert_frame_equal(expected_ts, groupby_asof(ts, dt_col=['index 1', 'index 2'], asof_col='observed_dt'))
def test_fancy_group_by_multi_index(): ts = multi_index_df_from_arrs( index_headers=('index 1', 'index 2', 'observed_dt'), index_arrs=[ [ '2012-09-08 17:06:11.040', '2012-09-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-09 17:06:11.040', '2012-10-09 17:06:11.040', '2012-11-08 17:06:11.040', ], ['SPAM Index', 'EGG Index', 'SPAM Index', 'SPAM Index'] + ['EGG Index', 'SPAM Index'] * 2, ['2015-01-01'] * 3 + ['2015-01-05'] + ['2015-01-01'] * 4 ], data_dict={'near': [1.0, 1.6, 2.0, 4.2, 2.1, 2.5, 2.6, 3.0]} ) expected_ts= multi_index_df_from_arrs( index_headers=('index 1', 'index 2'), index_arrs=[ [ '2012-09-08 17:06:11.040', '2012-09-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-08 17:06:11.040', '2012-10-09 17:06:11.040', '2012-10-09 17:06:11.040', '2012-11-08 17:06:11.040', ], ['EGG Index', 'SPAM Index'] * 3 + ['SPAM Index'] ], data_dict={'near': [1.6, 1.0, 2.1, 4.2, 2.6, 2.5, 3.0]} ) assert_frame_equal(expected_ts, groupby_asof(ts, dt_col=['index 1', 'index 2'], asof_col='observed_dt'))
def test_fancy_group_by_multi_index(): ts = read_str_as_pandas(""" index 1 | index 2 | observed_dt | near 2012-09-08 17:06:11.040 | SPAM Index | 2015-01-01 | 1.0 2012-09-08 17:06:11.040 | EGG Index | 2015-01-01 | 1.6 2012-10-08 17:06:11.040 | SPAM Index | 2015-01-01 | 2.0 2012-10-08 17:06:11.040 | SPAM Index | 2015-01-05 | 4.2 2012-10-08 17:06:11.040 | EGG Index | 2015-01-01 | 2.1 2012-10-09 17:06:11.040 | SPAM Index | 2015-01-01 | 2.5 2012-10-09 17:06:11.040 | EGG Index | 2015-01-01 | 2.6 2012-11-08 17:06:11.040 | SPAM Index | 2015-01-01 | 3.0""", num_index=3) expected_ts = read_str_as_pandas(""" index 1 | index 2 | near 2012-09-08 17:06:11.040 | EGG Index | 1.6 2012-09-08 17:06:11.040 | SPAM Index | 1.0 2012-10-08 17:06:11.040 | EGG Index | 2.1 2012-10-08 17:06:11.040 | SPAM Index | 4.2 2012-10-09 17:06:11.040 | EGG Index | 2.6 2012-10-09 17:06:11.040 | SPAM Index | 2.5 2012-11-08 17:06:11.040 | SPAM Index | 3.0""", num_index=2) assert_frame_equal( expected_ts, groupby_asof(ts, dt_col=['index 1', 'index 2'], asof_col='observed_dt'))