Пример #1
0
def test__get_ts__asof_latest():
    """ I can get the latest known value for each sample date
    """
    df = groupby_asof(get_bitemporal_test_data())
    assert len(df) == 4
    assert all(df['OPEN'] == [1.1, 2.1, 3.1, 4.1])
    assert all(df['CLOSE'] == [10.1, 20.1, 30.1, 40.1])
Пример #2
0
def test__get_ts__asof_datetime():
    """  I can get a timeseries as-of a particular point in time
    """
    df = groupby_asof(get_bitemporal_test_data(), as_of=dt('2015-01-05'))
    assert len(df) == 3
    assert all(df['OPEN'] == [1.1, 2.1, 3.0])
    assert all(df['CLOSE'] == [10.1, 20.1, 30.0])
Пример #3
0
def test__get_ts__asof_datetime():
    """  I can get a timeseries as-of a particular point in time
    """
    df = groupby_asof(get_bitemporal_test_data(), as_of=dt('2015-01-05'))
    assert len(df) == 3
    assert all(df['OPEN'] == [1.1, 2.1, 3.0])
    assert all(df['CLOSE'] == [10.1, 20.1, 30.0])
Пример #4
0
def test__get_ts__asof_latest():
    """ I can get the latest known value for each sample date
    """
    df = groupby_asof(get_bitemporal_test_data())
    assert len(df) == 4
    assert all(df['OPEN'] == [1.1, 2.1, 3.1, 4.1])
    assert all(df['CLOSE'] == [10.1, 20.1, 30.1, 40.1])
Пример #5
0
    def read(self, symbol, as_of=None, raw=False, **kwargs):
        # TODO: shall we block from_version from getting into super.read?
        """Read data for the named symbol. Returns a BitemporalItem object with
        a data and metdata element (as passed into write).

        Parameters
        ----------
        symbol : `str`
            symbol name for the item
        as_of : `datetime.datetime`
            Return the data as it was as_of the point in time.
        raw : `bool`
            If True, will return the full bitemporal dataframe (i.e. all versions of the data). This also means as_of is
            ignored.

        Returns
        -------
        BitemporalItem namedtuple which contains a .data and .metadata element
        """
        item = self._store.read(symbol, **kwargs)
        last_updated = max(item.data.index.get_level_values(self.observe_column))
        if raw:
            return BitemporalItem(symbol=symbol, library=self._store._arctic_lib.get_name(), data=item.data,
                                  metadata=item.metadata,
                                  last_updated=last_updated)
        else:
            index_names = list(item.data.index.names)
            index_names.remove(self.observe_column)
            return BitemporalItem(symbol=symbol, library=self._store._arctic_lib.get_name(),
                                  data=groupby_asof(item.data, as_of=as_of, dt_col=index_names,
                                                    asof_col=self.observe_column),
                                  metadata=item.metadata, last_updated=last_updated)
Пример #6
0
def test_fancy_group_by_multi_index():

    ts = multi_index_df_from_arrs(
        index_headers=('index 1', 'index 2', 'observed_dt'),
        index_arrs=[[
            '2012-09-08 17:06:11.040',
            '2012-09-08 17:06:11.040',
            '2012-10-08 17:06:11.040',
            '2012-10-08 17:06:11.040',
            '2012-10-08 17:06:11.040',
            '2012-10-09 17:06:11.040',
            '2012-10-09 17:06:11.040',
            '2012-11-08 17:06:11.040',
        ], ['SPAM Index', 'EGG Index', 'SPAM Index', 'SPAM Index'] +
                    ['EGG Index', 'SPAM Index'] * 2,
                    ['2015-01-01'] * 3 + ['2015-01-05'] + ['2015-01-01'] * 4],
        data_dict={'near': [1.0, 1.6, 2.0, 4.2, 2.1, 2.5, 2.6, 3.0]})

    expected_ts = multi_index_df_from_arrs(
        index_headers=('index 1', 'index 2'),
        index_arrs=[[
            '2012-09-08 17:06:11.040',
            '2012-09-08 17:06:11.040',
            '2012-10-08 17:06:11.040',
            '2012-10-08 17:06:11.040',
            '2012-10-09 17:06:11.040',
            '2012-10-09 17:06:11.040',
            '2012-11-08 17:06:11.040',
        ], ['EGG Index', 'SPAM Index'] * 3 + ['SPAM Index']],
        data_dict={'near': [1.6, 1.0, 2.1, 4.2, 2.6, 2.5, 3.0]})

    assert_frame_equal(
        expected_ts,
        groupby_asof(ts, dt_col=['index 1', 'index 2'],
                     asof_col='observed_dt'))
Пример #7
0
def test__can_insert_row():
    """ I can insert a new row into a bitemp ts and it comes back when selecting the latest data
    """
    df = get_bitemporal_test_data()
    df = insert_at(df, dt('2014-01-03'), [[9, 90]])
    assert len(df) == 9
    df = groupby_asof(df)
    assert len(df) == 4
    assert df.loc[dt('2014-01-03')]['OPEN'] == 9
    assert df.loc[dt('2014-01-03')]['CLOSE'] == 90
Пример #8
0
def test__can_insert_row():
    """ I can insert a new row into a bitemp ts and it comes back when selecting the latest data
    """
    df = get_bitemporal_test_data()
    df = insert_at(df, dt('2014-01-03'), [[9, 90]])
    assert len(df) == 9
    df = groupby_asof(df)
    assert len(df) == 4
    assert df.loc[dt('2014-01-03')]['OPEN'] == 9
    assert df.loc[dt('2014-01-03')]['CLOSE'] == 90
Пример #9
0
def test__get_ts__unsorted_index():
    """ I can get a timeseries as-of a date when the index isn't sorted properly
    """
    df = get_bitemporal_test_data()
    # Swap the third and fourth rows around. This would break the group-by if we didn't check
    # for sortedness
    df = df.reindex(df.index[[0, 1, 3, 2, 4, 5, 6, 7]])
    df = groupby_asof(df)
    assert len(df) == 4
    assert all(df['OPEN'] == [1.1, 2.1, 3.1, 4.1])
    assert all(df['CLOSE'] == [10.1, 20.1, 30.1, 40.1])
Пример #10
0
def test__get_ts__unsorted_index():
    """ I can get a timeseries as-of a date when the index isn't sorted properly
    """
    df = get_bitemporal_test_data()
    # Swap the third and fourth rows around. This would break the group-by if we didn't check
    # for sortedness
    df = df.reindex(df.index[[0, 1, 3, 2, 4, 5, 6, 7]])
    df = groupby_asof(df)
    assert len(df) == 4
    assert all(df['OPEN'] == [1.1, 2.1, 3.1, 4.1])
    assert all(df['CLOSE'] == [10.1, 20.1, 30.1, 40.1])
Пример #11
0
def test_fancy_group_by_multi_index():
    ts = read_str_as_pandas("""      index 1 |    index 2 | observed_dt | near
                     2012-09-08 17:06:11.040 | SPAM Index | 2015-01-01 |  1.0
                     2012-09-08 17:06:11.040 |  EGG Index | 2015-01-01 |  1.6
                     2012-10-08 17:06:11.040 | SPAM Index | 2015-01-01 |  2.0
                     2012-10-08 17:06:11.040 | SPAM Index | 2015-01-05 |  4.2
                     2012-10-08 17:06:11.040 |  EGG Index | 2015-01-01 |  2.1
                     2012-10-09 17:06:11.040 | SPAM Index | 2015-01-01 |  2.5
                     2012-10-09 17:06:11.040 |  EGG Index | 2015-01-01 |  2.6
                     2012-11-08 17:06:11.040 | SPAM Index | 2015-01-01 |  3.0""", num_index=3)
    expected_ts = read_str_as_pandas("""  index 1 |    index 2 | near
                          2012-09-08 17:06:11.040 |  EGG Index |  1.6
                          2012-09-08 17:06:11.040 | SPAM Index |  1.0
                          2012-10-08 17:06:11.040 |  EGG Index |  2.1
                          2012-10-08 17:06:11.040 | SPAM Index |  4.2
                          2012-10-09 17:06:11.040 |  EGG Index |  2.6
                          2012-10-09 17:06:11.040 | SPAM Index |  2.5
                          2012-11-08 17:06:11.040 | SPAM Index |  3.0""", num_index=2)
    assert_frame_equal(expected_ts, groupby_asof(ts, dt_col=['index 1', 'index 2'], asof_col='observed_dt'))
Пример #12
0
def test_fancy_group_by_multi_index():

    ts = multi_index_df_from_arrs(
        index_headers=('index 1', 'index 2', 'observed_dt'),
        index_arrs=[
            [
                '2012-09-08 17:06:11.040',
                '2012-09-08 17:06:11.040',
                '2012-10-08 17:06:11.040',
                '2012-10-08 17:06:11.040',
                '2012-10-08 17:06:11.040',
                '2012-10-09 17:06:11.040',
                '2012-10-09 17:06:11.040',
                '2012-11-08 17:06:11.040',
            ],
            ['SPAM Index', 'EGG Index', 'SPAM Index', 'SPAM Index'] + ['EGG Index', 'SPAM Index'] * 2,
            ['2015-01-01'] * 3 + ['2015-01-05'] + ['2015-01-01'] * 4
        ],
        data_dict={'near': [1.0, 1.6, 2.0, 4.2, 2.1, 2.5, 2.6, 3.0]}
    )

    expected_ts= multi_index_df_from_arrs(
        index_headers=('index 1', 'index 2'),
        index_arrs=[
            [
                '2012-09-08 17:06:11.040',
                '2012-09-08 17:06:11.040',
                '2012-10-08 17:06:11.040',
                '2012-10-08 17:06:11.040',
                '2012-10-09 17:06:11.040',
                '2012-10-09 17:06:11.040',
                '2012-11-08 17:06:11.040',
            ],
            ['EGG Index', 'SPAM Index'] * 3 + ['SPAM Index']
        ],
        data_dict={'near': [1.6, 1.0, 2.1, 4.2, 2.6, 2.5, 3.0]}
    )

    assert_frame_equal(expected_ts, groupby_asof(ts, dt_col=['index 1', 'index 2'], asof_col='observed_dt'))
Пример #13
0
def test_fancy_group_by_multi_index():
    ts = read_str_as_pandas("""      index 1 |    index 2 | observed_dt | near
                     2012-09-08 17:06:11.040 | SPAM Index | 2015-01-01 |  1.0
                     2012-09-08 17:06:11.040 |  EGG Index | 2015-01-01 |  1.6
                     2012-10-08 17:06:11.040 | SPAM Index | 2015-01-01 |  2.0
                     2012-10-08 17:06:11.040 | SPAM Index | 2015-01-05 |  4.2
                     2012-10-08 17:06:11.040 |  EGG Index | 2015-01-01 |  2.1
                     2012-10-09 17:06:11.040 | SPAM Index | 2015-01-01 |  2.5
                     2012-10-09 17:06:11.040 |  EGG Index | 2015-01-01 |  2.6
                     2012-11-08 17:06:11.040 | SPAM Index | 2015-01-01 |  3.0""",
                            num_index=3)
    expected_ts = read_str_as_pandas("""  index 1 |    index 2 | near
                          2012-09-08 17:06:11.040 |  EGG Index |  1.6
                          2012-09-08 17:06:11.040 | SPAM Index |  1.0
                          2012-10-08 17:06:11.040 |  EGG Index |  2.1
                          2012-10-08 17:06:11.040 | SPAM Index |  4.2
                          2012-10-09 17:06:11.040 |  EGG Index |  2.6
                          2012-10-09 17:06:11.040 | SPAM Index |  2.5
                          2012-11-08 17:06:11.040 | SPAM Index |  3.0""",
                                     num_index=2)
    assert_frame_equal(
        expected_ts,
        groupby_asof(ts, dt_col=['index 1', 'index 2'],
                     asof_col='observed_dt'))