def _init_panels(self, sids): if self.downsample: self.rolling_panel = RollingPanel(self.bars_in_day, self.field_names, sids) self.daily_rolling_panel = RollingPanel(self.window_length, self.field_names, sids) else: self.rolling_panel = RollingPanel( self.window_length * self.bars_in_day, self.field_names, sids)
def test_basics(self): items = ['foo', 'bar', 'baz'] minor = ['A', 'B', 'C', 'D'] window = 10 rp = RollingPanel(window, items, minor, cap_multiple=2) dates = pd.date_range('2000-01-01', periods=30, tz='utc') major_deque = deque() frames = {} for i in range(30): frame = pd.DataFrame(np.random.randn(3, 4), index=items, columns=minor) date = dates[i] rp.add_frame(date, frame) frames[date] = frame major_deque.append(date) if i >= window: major_deque.popleft() result = rp.get_current() expected = pd.Panel(frames, items=list(major_deque), major_axis=items, minor_axis=minor) tm.assert_panel_equal(result, expected.swapaxes(0, 1))
def _append_to_window(self, event): self.field_names = self._get_field_names(event) if self.static_sids is None: sids = set(event.data.keys()) else: sids = self.static_sids # Create rolling panel if not existant if self.rolling_panel is None: self.rolling_panel = RollingPanel( self.window_length * self.bars_in_day, self.field_names, sids) # Store event in rolling frame self.rolling_panel.add_frame( event.dt, pd.DataFrame(event.data, index=self.field_names, columns=sids)) # update trading day counters _, mkt_close = trading.environment.get_open_and_close(event.dt) if self.bars == 'daily': # Daily bars have their dt set to midnight. mkt_close = mkt_close.replace(hour=0, minute=0, second=0) if event.dt >= mkt_close: self.trading_days_total += 1 self.last_dt = event.dt if self.trading_days_total >= self.window_length: self.full = True
def create_initial_day_panel(days_needed, fields, sids, dt): index = days_index_at_dt(days_needed, dt) # Use original index in case of 1 bar. if days_needed != 1: index = index[:-1] window = len(index) rp = RollingPanel(window, fields, sids) for i, day in enumerate(index): rp.index_buf[i] = day rp.pos = window return rp
def test_alignment(self, env): items = ('a', 'b') sids = (1, 2) dts = env.market_minute_window( env.open_and_closes.market_open[0], 4, ).values rp = RollingPanel(2, items, sids, initial_dates=dts[1:-1]) frame = pd.DataFrame( data=np.arange(4).reshape((2, 2)), columns=sids, index=items, ) nan_arr = np.empty((2, 6)) nan_arr.fill(np.nan) rp.add_frame(dts[-1], frame) cur = rp.get_current() data = np.array( (((np.nan, np.nan), (0, 1)), ((np.nan, np.nan), (2, 3))), float) expected = pd.Panel( data, major_axis=dts[2:], minor_axis=sids, items=items, ) expected.major_axis = expected.major_axis.tz_localize('utc') tm.assert_panel_equal( cur, expected, ) rp.extend_back(dts[:-2]) cur = rp.get_current() data = np.array( (((np.nan, np.nan), (np.nan, np.nan), (np.nan, np.nan), (0, 1)), ((np.nan, np.nan), (np.nan, np.nan), (np.nan, np.nan), (2, 3))), float) expected = pd.Panel( data, major_axis=dts, minor_axis=sids, items=items, ) expected.major_axis = expected.major_axis.tz_localize('utc') tm.assert_panel_equal( cur, expected, )
def create_buffer_panel(self, initial_dt): """ Initialize a RollingPanel containing enough minutes to service all our frequencies. """ max_bars_needed = max(freq.max_minutes for freq in self.unique_frequencies) rp = RollingPanel( window=max_bars_needed, items=self.fields, sids=self.sids, ) return rp
def f(option='clever', n=500, copy=False): items = range(5) minor = range(20) window = 100 periods = n dates = pd.date_range('2000-01-01', periods=periods, tz='utc') frames = {} if option == 'clever': rp = RollingPanel(window, items, minor, cap_multiple=2) major_deque = deque() dummy = pd.DataFrame(np.random.randn(len(items), len(minor)), index=items, columns=minor) for i in range(periods): frame = dummy * (1 + 0.001 * i) date = dates[i] rp.add_frame(date, frame) frames[date] = frame major_deque.append(date) if i >= window: del frames[major_deque.popleft()] result = rp.get_current() if copy: result = result.copy() else: major_deque = deque() dummy = pd.DataFrame(np.random.randn(len(items), len(minor)), index=items, columns=minor) for i in range(periods): frame = dummy * (1 + 0.001 * i) date = dates[i] frames[date] = frame major_deque.append(date) if i >= window: del frames[major_deque.popleft()] result = pd.Panel(frames, items=list(major_deque), major_axis=items, minor_axis=minor)
def create_digest_panels(self, initial_sids, initial_dt): """ Initialize a RollingPanel for each unique panel frequency being stored by this container. Each RollingPanel pre-allocates enough storage space to service the highest bar-count of any history call that it serves. Relies on the fact that group_by_frequency sorts the value lists by ascending bar count. """ # Map from frequency -> first/last minute of the next digest to be # rolled for that frequency. first_window_starts = {} first_window_closes = {} # Map from frequency -> digest_panels. panels = {} for freq, specs in iteritems(self.frequency_groups): # Relying on the sorting of group_by_frequency to get the spec # requiring the largest number of bars. largest_spec = specs[-1] if largest_spec.bar_count == 1: # No need to allocate a digest panel; this frequency will only # ever use data drawn from self.buffer_panel. first_window_starts[freq] = freq.window_open(initial_dt) first_window_closes[freq] = freq.window_close( first_window_starts[freq] ) continue initial_dates = index_at_dt(largest_spec, initial_dt) # Set up dates for our first digest roll, which is keyed to the # close of the first entry in our initial index. first_window_closes[freq] = initial_dates[0] first_window_starts[freq] = freq.window_open(initial_dates[0]) rp = RollingPanel( window=len(initial_dates) - 1, items=self.fields, sids=initial_sids, ) panels[freq] = rp return panels, first_window_starts, first_window_closes
def create_buffer_panel(self, initial_sids, initial_dt): """ Initialize a RollingPanel containing enough minutes to service all our frequencies. """ max_bars_needed = max(freq.max_minutes for freq in self.unique_frequencies) rp = RollingPanel( max_bars_needed, self.fields, initial_sids, # Restrict the initial data down to just the fields being used in # this container. ) return rp
def _create_panel(self, dt, spec): """ Constructs a rolling panel with a properly aligned date_buf. """ dt = normalize_to_data_freq(spec.frequency.data_frequency, dt) window = spec.bar_count - 1 date_buf = self._create_window_date_buf( window, spec.frequency.unit_str, spec.frequency.data_frequency, dt, ) panel = RollingPanel( window=window, items=self.fields, sids=self.sids, initial_dates=date_buf, ) return panel
def test_get_current_multiple_call_same_tick(self, env): """ In old get_current, each call the get_current would copy the data. Thus changing that object would have no side effects. To keep the same api, make sure that the raw option returns a copy too. """ def data_id(values): return values.__array_interface__['data'] items = ('a', 'b') sids = (1, 2) dts = env.market_minute_window( env.open_and_closes.market_open[0], 4, ).values rp = RollingPanel(2, items, sids, initial_dates=dts[1:-1]) frame = pd.DataFrame( data=np.arange(4).reshape((2, 2)), columns=sids, index=items, ) nan_arr = np.empty((2, 6)) nan_arr.fill(np.nan) rp.add_frame(dts[-1], frame) # each get_current call makea a copy cur = rp.get_current() cur2 = rp.get_current() assert data_id(cur.values) != data_id(cur2.values) # make sure raw follow same logic raw = rp.get_current(raw=True) raw2 = rp.get_current(raw=True) assert data_id(raw) != data_id(raw2)
def test_adding_and_dropping_items(self, n_items=5, n_minor=10, window=10, periods=30): np.random.seed(123) items = deque(range(n_items)) minor = deque(range(n_minor)) expected_items = deque(range(n_items)) expected_minor = deque(range(n_minor)) first_non_existant = max(n_items, n_minor) + 1 # We want to add new columns with random order add_items = np.arange(first_non_existant, first_non_existant + periods) np.random.shuffle(add_items) rp = RollingPanel(window, items, minor, cap_multiple=2) dates = pd.date_range('2000-01-01', periods=periods, tz='utc') frames = {} expected_frames = deque(maxlen=window) expected_dates = deque() for i, (date, add_item) in enumerate(zip(dates, add_items)): frame = pd.DataFrame(np.random.randn(n_items, n_minor), index=items, columns=minor) if i >= window: # Old labels and dates should start to get dropped at every # call del frames[expected_dates.popleft()] expected_minor.popleft() expected_items.popleft() expected_frames.append(frame) expected_dates.append(date) rp.add_frame(date, frame) frames[date] = frame result = rp.get_current() np.testing.assert_array_equal(sorted(result.minor_axis.values), sorted(expected_minor)) np.testing.assert_array_equal(sorted(result.items.values), sorted(expected_items)) tm.assert_frame_equal(frame.T, result.ix[frame.index, -1, frame.columns]) expected_result = pd.Panel(frames).swapaxes(0, 1) tm.assert_panel_equal(expected_result, result) # Insert new items minor.popleft() minor.append(add_item) items.popleft() items.append(add_item) expected_minor.append(add_item) expected_items.append(add_item)
def run_history_implementations(option='clever', n=500, change_fields=False, copy=False, n_items=15, n_minor=20, change_freq=5, window=100): items = range(n_items) minor = range(n_minor) periods = n dates = pd.date_range('2000-01-01', periods=periods, tz='utc') frames = {} if option == 'clever': rp = RollingPanel(window, items, minor, cap_multiple=2) major_deque = deque() for i in range(periods): # Add a new and drop an field every change_freq iterations if change_fields and (i % change_freq) == 0: minor = minor[1:] minor.append(minor[-1] + 1) items = items[1:] items.append(items[-1] + 1) dummy = pd.DataFrame(np.random.randn(len(items), len(minor)), index=items, columns=minor) frame = dummy * (1 + 0.001 * i) date = dates[i] rp.add_frame(date, frame) frames[date] = frame major_deque.append(date) if i >= window: del frames[major_deque.popleft()] result = rp.get_current() if copy: result = result.copy() else: major_deque = deque() dummy = pd.DataFrame(np.random.randn(len(items), len(minor)), index=items, columns=minor) for i in range(periods): frame = dummy * (1 + 0.001 * i) date = dates[i] frames[date] = frame major_deque.append(date) if i >= window: del frames[major_deque.popleft()] result = pd.Panel(frames, items=list(major_deque), major_axis=items, minor_axis=minor)