예제 #1
0
    def _init_panels(self, sids):
        if self.downsample:
            self.rolling_panel = RollingPanel(self.bars_in_day,
                                              self.field_names, sids)

            self.daily_rolling_panel = RollingPanel(self.window_length,
                                                    self.field_names, sids)
        else:
            self.rolling_panel = RollingPanel(
                self.window_length * self.bars_in_day, self.field_names, sids)
예제 #2
0
    def test_basics(self):
        items = ['foo', 'bar', 'baz']
        minor = ['A', 'B', 'C', 'D']

        window = 10

        rp = RollingPanel(window, items, minor, cap_multiple=2)

        dates = pd.date_range('2000-01-01', periods=30, tz='utc')

        major_deque = deque()

        frames = {}

        for i in range(30):
            frame = pd.DataFrame(np.random.randn(3, 4),
                                 index=items,
                                 columns=minor)
            date = dates[i]

            rp.add_frame(date, frame)

            frames[date] = frame
            major_deque.append(date)

            if i >= window:
                major_deque.popleft()

            result = rp.get_current()
            expected = pd.Panel(frames,
                                items=list(major_deque),
                                major_axis=items,
                                minor_axis=minor)
            tm.assert_panel_equal(result, expected.swapaxes(0, 1))
예제 #3
0
    def _append_to_window(self, event):
        self.field_names = self._get_field_names(event)

        if self.static_sids is None:
            sids = set(event.data.keys())
        else:
            sids = self.static_sids

        # Create rolling panel if not existant
        if self.rolling_panel is None:
            self.rolling_panel = RollingPanel(
                self.window_length * self.bars_in_day, self.field_names, sids)

        # Store event in rolling frame
        self.rolling_panel.add_frame(
            event.dt,
            pd.DataFrame(event.data, index=self.field_names, columns=sids))

        # update trading day counters
        _, mkt_close = trading.environment.get_open_and_close(event.dt)
        if self.bars == 'daily':
            # Daily bars have their dt set to midnight.
            mkt_close = mkt_close.replace(hour=0, minute=0, second=0)
        if event.dt >= mkt_close:
            self.trading_days_total += 1

        self.last_dt = event.dt

        if self.trading_days_total >= self.window_length:
            self.full = True
예제 #4
0
def create_initial_day_panel(days_needed, fields, sids, dt):
    index = days_index_at_dt(days_needed, dt)
    # Use original index in case of 1 bar.
    if days_needed != 1:
        index = index[:-1]
    window = len(index)
    rp = RollingPanel(window, fields, sids)
    for i, day in enumerate(index):
        rp.index_buf[i] = day
    rp.pos = window
    return rp
예제 #5
0
    def test_alignment(self, env):
        items = ('a', 'b')
        sids = (1, 2)

        dts = env.market_minute_window(
            env.open_and_closes.market_open[0],
            4,
        ).values
        rp = RollingPanel(2, items, sids, initial_dates=dts[1:-1])

        frame = pd.DataFrame(
            data=np.arange(4).reshape((2, 2)),
            columns=sids,
            index=items,
        )

        nan_arr = np.empty((2, 6))
        nan_arr.fill(np.nan)

        rp.add_frame(dts[-1], frame)

        cur = rp.get_current()
        data = np.array(
            (((np.nan, np.nan), (0, 1)), ((np.nan, np.nan), (2, 3))), float)
        expected = pd.Panel(
            data,
            major_axis=dts[2:],
            minor_axis=sids,
            items=items,
        )
        expected.major_axis = expected.major_axis.tz_localize('utc')
        tm.assert_panel_equal(
            cur,
            expected,
        )

        rp.extend_back(dts[:-2])

        cur = rp.get_current()
        data = np.array(
            (((np.nan, np.nan), (np.nan, np.nan), (np.nan, np.nan), (0, 1)),
             ((np.nan, np.nan), (np.nan, np.nan), (np.nan, np.nan), (2, 3))),
            float)
        expected = pd.Panel(
            data,
            major_axis=dts,
            minor_axis=sids,
            items=items,
        )
        expected.major_axis = expected.major_axis.tz_localize('utc')
        tm.assert_panel_equal(
            cur,
            expected,
        )
예제 #6
0
 def create_buffer_panel(self, initial_dt):
     """
     Initialize a RollingPanel containing enough minutes to service all our
     frequencies.
     """
     max_bars_needed = max(freq.max_minutes
                           for freq in self.unique_frequencies)
     rp = RollingPanel(
         window=max_bars_needed,
         items=self.fields,
         sids=self.sids,
     )
     return rp
예제 #7
0
def f(option='clever', n=500, copy=False):
    items = range(5)
    minor = range(20)
    window = 100
    periods = n

    dates = pd.date_range('2000-01-01', periods=periods, tz='utc')
    frames = {}

    if option == 'clever':
        rp = RollingPanel(window, items, minor, cap_multiple=2)
        major_deque = deque()
        dummy = pd.DataFrame(np.random.randn(len(items), len(minor)),
                             index=items,
                             columns=minor)

        for i in range(periods):
            frame = dummy * (1 + 0.001 * i)
            date = dates[i]

            rp.add_frame(date, frame)

            frames[date] = frame
            major_deque.append(date)

            if i >= window:
                del frames[major_deque.popleft()]

            result = rp.get_current()
            if copy:
                result = result.copy()
    else:
        major_deque = deque()
        dummy = pd.DataFrame(np.random.randn(len(items), len(minor)),
                             index=items,
                             columns=minor)

        for i in range(periods):
            frame = dummy * (1 + 0.001 * i)
            date = dates[i]
            frames[date] = frame
            major_deque.append(date)

            if i >= window:
                del frames[major_deque.popleft()]

            result = pd.Panel(frames,
                              items=list(major_deque),
                              major_axis=items,
                              minor_axis=minor)
예제 #8
0
    def create_digest_panels(self, initial_sids, initial_dt):
        """
        Initialize a RollingPanel for each unique panel frequency being stored
        by this container.  Each RollingPanel pre-allocates enough storage
        space to service the highest bar-count of any history call that it
        serves.

        Relies on the fact that group_by_frequency sorts the value lists by
        ascending bar count.
        """
        # Map from frequency -> first/last minute of the next digest to be
        # rolled for that frequency.
        first_window_starts = {}
        first_window_closes = {}

        # Map from frequency -> digest_panels.
        panels = {}
        for freq, specs in iteritems(self.frequency_groups):

            # Relying on the sorting of group_by_frequency to get the spec
            # requiring the largest number of bars.
            largest_spec = specs[-1]
            if largest_spec.bar_count == 1:

                # No need to allocate a digest panel; this frequency will only
                # ever use data drawn from self.buffer_panel.
                first_window_starts[freq] = freq.window_open(initial_dt)
                first_window_closes[freq] = freq.window_close(
                    first_window_starts[freq]
                )

                continue

            initial_dates = index_at_dt(largest_spec, initial_dt)

            # Set up dates for our first digest roll, which is keyed to the
            # close of the first entry in our initial index.
            first_window_closes[freq] = initial_dates[0]
            first_window_starts[freq] = freq.window_open(initial_dates[0])

            rp = RollingPanel(
                window=len(initial_dates) - 1,
                items=self.fields,
                sids=initial_sids,
            )

            panels[freq] = rp

        return panels, first_window_starts, first_window_closes
예제 #9
0
 def create_buffer_panel(self, initial_sids, initial_dt):
     """
     Initialize a RollingPanel containing enough minutes to service all our
     frequencies.
     """
     max_bars_needed = max(freq.max_minutes
                           for freq in self.unique_frequencies)
     rp = RollingPanel(
         max_bars_needed,
         self.fields,
         initial_sids,
         # Restrict the initial data down to just the fields being used in
         # this container.
     )
     return rp
예제 #10
0
    def _create_panel(self, dt, spec):
        """
        Constructs a rolling panel with a properly aligned date_buf.
        """
        dt = normalize_to_data_freq(spec.frequency.data_frequency, dt)

        window = spec.bar_count - 1

        date_buf = self._create_window_date_buf(
            window,
            spec.frequency.unit_str,
            spec.frequency.data_frequency,
            dt,
        )
        panel = RollingPanel(
            window=window,
            items=self.fields,
            sids=self.sids,
            initial_dates=date_buf,
        )

        return panel
예제 #11
0
    def test_get_current_multiple_call_same_tick(self, env):
        """
        In old get_current, each call the get_current would copy the data. Thus
        changing that object would have no side effects.

        To keep the same api, make sure that the raw option returns a copy too.
        """
        def data_id(values):
            return values.__array_interface__['data']

        items = ('a', 'b')
        sids = (1, 2)

        dts = env.market_minute_window(
            env.open_and_closes.market_open[0],
            4,
        ).values
        rp = RollingPanel(2, items, sids, initial_dates=dts[1:-1])

        frame = pd.DataFrame(
            data=np.arange(4).reshape((2, 2)),
            columns=sids,
            index=items,
        )

        nan_arr = np.empty((2, 6))
        nan_arr.fill(np.nan)

        rp.add_frame(dts[-1], frame)

        # each get_current call makea a copy
        cur = rp.get_current()
        cur2 = rp.get_current()
        assert data_id(cur.values) != data_id(cur2.values)

        # make sure raw follow same logic
        raw = rp.get_current(raw=True)
        raw2 = rp.get_current(raw=True)
        assert data_id(raw) != data_id(raw2)
예제 #12
0
    def test_adding_and_dropping_items(self,
                                       n_items=5,
                                       n_minor=10,
                                       window=10,
                                       periods=30):
        np.random.seed(123)

        items = deque(range(n_items))
        minor = deque(range(n_minor))

        expected_items = deque(range(n_items))
        expected_minor = deque(range(n_minor))

        first_non_existant = max(n_items, n_minor) + 1
        # We want to add new columns with random order
        add_items = np.arange(first_non_existant, first_non_existant + periods)
        np.random.shuffle(add_items)

        rp = RollingPanel(window, items, minor, cap_multiple=2)

        dates = pd.date_range('2000-01-01', periods=periods, tz='utc')

        frames = {}

        expected_frames = deque(maxlen=window)
        expected_dates = deque()

        for i, (date, add_item) in enumerate(zip(dates, add_items)):
            frame = pd.DataFrame(np.random.randn(n_items, n_minor),
                                 index=items,
                                 columns=minor)

            if i >= window:
                # Old labels and dates should start to get dropped at every
                # call
                del frames[expected_dates.popleft()]
                expected_minor.popleft()
                expected_items.popleft()

            expected_frames.append(frame)
            expected_dates.append(date)

            rp.add_frame(date, frame)

            frames[date] = frame

            result = rp.get_current()
            np.testing.assert_array_equal(sorted(result.minor_axis.values),
                                          sorted(expected_minor))
            np.testing.assert_array_equal(sorted(result.items.values),
                                          sorted(expected_items))
            tm.assert_frame_equal(frame.T, result.ix[frame.index, -1,
                                                     frame.columns])
            expected_result = pd.Panel(frames).swapaxes(0, 1)
            tm.assert_panel_equal(expected_result, result)

            # Insert new items
            minor.popleft()
            minor.append(add_item)
            items.popleft()
            items.append(add_item)

            expected_minor.append(add_item)
            expected_items.append(add_item)
예제 #13
0
def run_history_implementations(option='clever',
                                n=500,
                                change_fields=False,
                                copy=False,
                                n_items=15,
                                n_minor=20,
                                change_freq=5,
                                window=100):
    items = range(n_items)
    minor = range(n_minor)
    periods = n

    dates = pd.date_range('2000-01-01', periods=periods, tz='utc')
    frames = {}

    if option == 'clever':
        rp = RollingPanel(window, items, minor, cap_multiple=2)
        major_deque = deque()

        for i in range(periods):
            # Add a new and drop an field every change_freq iterations
            if change_fields and (i % change_freq) == 0:
                minor = minor[1:]
                minor.append(minor[-1] + 1)
                items = items[1:]
                items.append(items[-1] + 1)

            dummy = pd.DataFrame(np.random.randn(len(items), len(minor)),
                                 index=items,
                                 columns=minor)

            frame = dummy * (1 + 0.001 * i)
            date = dates[i]

            rp.add_frame(date, frame)

            frames[date] = frame
            major_deque.append(date)

            if i >= window:
                del frames[major_deque.popleft()]

            result = rp.get_current()
            if copy:
                result = result.copy()
    else:
        major_deque = deque()
        dummy = pd.DataFrame(np.random.randn(len(items), len(minor)),
                             index=items,
                             columns=minor)

        for i in range(periods):
            frame = dummy * (1 + 0.001 * i)
            date = dates[i]
            frames[date] = frame
            major_deque.append(date)

            if i >= window:
                del frames[major_deque.popleft()]

            result = pd.Panel(frames,
                              items=list(major_deque),
                              major_axis=items,
                              minor_axis=minor)