def test__can_insert_row(): """ I can insert a new row into a bitemp ts and it comes back when selecting the latest data """ df = get_bitemporal_test_data() df = insert_at(df, dt('2014-01-03'), [[9, 90]]) assert len(df) == 9 df = groupby_asof(df) assert len(df) == 4 assert df.loc[dt('2014-01-03')]['OPEN'] == 9 assert df.loc[dt('2014-01-03')]['CLOSE'] == 90
def test__get_ts__asof_datetime(): """ I can get a timeseries as-of a particular point in time """ df = groupby_asof(get_bitemporal_test_data(), as_of=dt('2015-01-05')) assert len(df) == 3 assert all(df['OPEN'] == [1.1, 2.1, 3.0]) assert all(df['CLOSE'] == [10.1, 20.1, 30.0])
def insert_at(df, sample_date, values): """ Insert some values into a bi-temporal dataframe. This is like what would happen when we get a price correction. """ observed_dt = dt(datetime.now()) return multi_index_insert_row(df, [sample_date, observed_dt], values)
def get_datetime_index_test_data(): sample_dates = pd.DatetimeIndex(4 * [dt('1/1/2014 21:30')] + 4 * [dt('2/1/2014 21:30')] + 4 * [dt('3/1/2014 21:30')]) observed_dates = [ dt('1/1/2014 22:00'), dt('1/1/2014 22:30'), dt('2/1/2014 00:00'), dt('1/1/2015 21:30'), dt('2/1/2014 23:00'), dt('2/1/2014 23:30'), dt('3/1/2014 00:00'), dt('2/1/2015 21:30'), dt('3/1/2014 21:30'), dt('3/1/2014 22:30'), dt('4/1/2014 00:00'), dt('3/1/2015 21:30'), ] index = pd.MultiIndex.from_arrays([sample_dates, observed_dates], names=['sample_dt', 'observed_dt']) prices = np.arange(24).reshape(12, 2) * 10 df = pd.DataFrame(prices, index=index, columns=['OPEN', 'CLOSE']) # OPEN CLOSE # sample_dt observed_dt # 2014-01-01 21:30:00 2014-01-01 22:00:00 0 10 # 2014-01-01 22:30:00 20 30 # 2014-02-01 00:00:00 40 50 # 2015-01-01 21:30:00 60 70 # 2014-02-01 21:30:00 2014-02-01 23:00:00 80 90 # 2014-02-01 23:30:00 100 110 # 2014-03-01 00:00:00 120 130 # 2015-02-01 21:30:00 140 150 # 2014-03-01 21:30:00 2014-03-01 21:30:00 160 170 # 2014-03-01 22:30:00 180 190 # 2014-04-01 00:00:00 200 210 # 2015-03-01 21:30:00 220 230 return df
def get_datetime_index_test_data(): sample_dates = pd.DatetimeIndex(4 * [dt('1/1/2014 21:30')] + 4 * [dt('2/1/2014 21:30')] + 4 * [dt('3/1/2014 21:30')]) observed_dates = [dt('1/1/2014 22:00'), dt('1/1/2014 22:30'), dt('2/1/2014 00:00'), dt('1/1/2015 21:30'), dt('2/1/2014 23:00'), dt('2/1/2014 23:30'), dt('3/1/2014 00:00'), dt('2/1/2015 21:30'), dt('3/1/2014 21:30'), dt('3/1/2014 22:30'), dt('4/1/2014 00:00'), dt('3/1/2015 21:30'), ] index = pd.MultiIndex.from_arrays([sample_dates, observed_dates], names=['sample_dt', 'observed_dt']) prices = np.arange(24).reshape(12, 2) * 10 df = pd.DataFrame(prices, index=index, columns=['OPEN', 'CLOSE']) # OPEN CLOSE # sample_dt observed_dt # 2014-01-01 21:30:00 2014-01-01 22:00:00 0 10 # 2014-01-01 22:30:00 20 30 # 2014-02-01 00:00:00 40 50 # 2015-01-01 21:30:00 60 70 # 2014-02-01 21:30:00 2014-02-01 23:00:00 80 90 # 2014-02-01 23:30:00 100 110 # 2014-03-01 00:00:00 120 130 # 2015-02-01 21:30:00 140 150 # 2014-03-01 21:30:00 2014-03-01 21:30:00 160 170 # 2014-03-01 22:30:00 180 190 # 2014-04-01 00:00:00 200 210 # 2015-03-01 21:30:00 220 230 return df