def test_load_empty(self): """ For the case where raw data is empty, make sure we have a result for all sids, that the dimensions are correct, and that we have the correct missing value. """ raw_events = pd.DataFrame( columns=[ "sid", "timestamp", "event_date", "float", "int", "datetime", "string", ] ) next_value_columns = { EventDataSet_US.next_datetime: "datetime", EventDataSet_US.next_event_date: "event_date", EventDataSet_US.next_float: "float", EventDataSet_US.next_int: "int", EventDataSet_US.next_string: "string", EventDataSet_US.next_string_custom_missing: "string", } previous_value_columns = { EventDataSet_US.previous_datetime: "datetime", EventDataSet_US.previous_event_date: "event_date", EventDataSet_US.previous_float: "float", EventDataSet_US.previous_int: "int", EventDataSet_US.previous_string: "string", EventDataSet_US.previous_string_custom_missing: "string", } loader = EventsLoader(raw_events, next_value_columns, previous_value_columns) engine = SimplePipelineEngine( lambda x: loader, self.asset_finder, ) results = engine.run_pipeline( Pipeline( {c.name: c.latest for c in EventDataSet_US.columns}, domain=US_EQUITIES ), start_date=self.trading_days[0], end_date=self.trading_days[-1], ) assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS) dates = self.trading_days expected = self.frame_containing_all_missing_values( index=pd.MultiIndex.from_product([dates, assets]), columns=EventDataSet_US.columns, ) assert_equal(results, expected)
def test_wrong_cols(self): # Test wrong cols (cols != expected) events = pd.DataFrame({ 'c': [5], SID_FIELD_NAME: [1], TS_FIELD_NAME: [pd.Timestamp('2014')], EVENT_DATE_FIELD_NAME: [pd.Timestamp('2014')], }) EventsLoader(events, {EventDataSet.next_float: 'c'}, {}) EventsLoader(events, {}, {EventDataSet.previous_float: 'c'}) with self.assertRaises(ValueError) as e: EventsLoader(events, {EventDataSet.next_float: 'd'}, {}) msg = str(e.exception) expected = ( "EventsLoader missing required columns ['d'].\n" "Got Columns: ['c', 'event_date', 'sid', 'timestamp']\n" "Expected Columns: ['d', 'event_date', 'sid', 'timestamp']") self.assertEqual(msg, expected)
def test_wrong_cols(self): # Test wrong cols (cols != expected) events = pd.DataFrame( { "c": [5], SID_FIELD_NAME: [1], TS_FIELD_NAME: [pd.Timestamp("2014")], EVENT_DATE_FIELD_NAME: [pd.Timestamp("2014")], } ) EventsLoader(events, {EventDataSet_US.next_float: "c"}, {}) EventsLoader(events, {}, {EventDataSet_US.previous_float: "c"}) expected = ( "EventsLoader missing required columns ['d'].\n" "Got Columns: ['c', 'event_date', 'sid', 'timestamp']\n" "Expected Columns: ['d', 'event_date', 'sid', 'timestamp']" ) with pytest.raises(ValueError, match=re.escape(expected)): EventsLoader(events, {EventDataSet_US.next_float: "d"}, {})
def load_adjusted_array(self, columns, dates, assets, mask): raw = load_raw_data(assets, dates, self._data_query_time, self._data_query_tz, self._expr, self._odo_kwargs) return EventsLoader( events=raw, next_value_columns=self._next_value_columns, previous_value_columns=self._previous_value_columns, ).load_adjusted_array( columns, dates, assets, mask, )
def load_adjusted_array(self, domain, columns, dates, sids, mask): raw = load_raw_data( sids, domain.data_query_cutoff_for_sessions(dates), self._expr, self._odo_kwargs, ) return EventsLoader( events=raw, next_value_columns=self._next_value_columns, previous_value_columns=self._previous_value_columns, ).load_adjusted_array( domain, columns, dates, sids, mask, )
def init_class_fixtures(cls): # This is a rare case where we actually want to do work **before** we # call init_class_fixtures. We choose our sids for WithAssetFinder # based on the events generated by make_event_data. cls.raw_events = make_events(add_nulls=True) cls.raw_events_no_nulls = cls.raw_events[ cls.raw_events['event_date'].notnull() ] cls.next_value_columns = { EventDataSet_US.next_datetime: 'datetime', EventDataSet_US.next_event_date: 'event_date', EventDataSet_US.next_float: 'float', EventDataSet_US.next_int: 'int', EventDataSet_US.next_string: 'string', EventDataSet_US.next_string_custom_missing: 'string' } cls.previous_value_columns = { EventDataSet_US.previous_datetime: 'datetime', EventDataSet_US.previous_event_date: 'event_date', EventDataSet_US.previous_float: 'float', EventDataSet_US.previous_int: 'int', EventDataSet_US.previous_string: 'string', EventDataSet_US.previous_string_custom_missing: 'string' } cls.loader = EventsLoader( cls.raw_events, cls.next_value_columns, cls.previous_value_columns, ) cls.ASSET_FINDER_EQUITY_SIDS = list(cls.raw_events['sid'].unique()) cls.ASSET_FINDER_EQUITY_SYMBOLS = [ 's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS ] super(EventsLoaderTestCase, cls).init_class_fixtures() cls.engine = SimplePipelineEngine( lambda c: cls.loader, asset_finder=cls.asset_finder, default_domain=US_EQUITIES, )
def load_adjusted_array(self, columns, dates, assets, mask): data_query_time = self._data_query_time data_query_tz = self._data_query_tz lower_dt, upper_dt = normalize_data_query_bounds( dates[0], dates[-1], data_query_time, data_query_tz, ) raw = ffill_query_in_range( self._expr, lower_dt, upper_dt, self._odo_kwargs, ) sids = raw.loc[:, SID_FIELD_NAME] raw.drop(sids[~sids.isin(assets)].index, inplace=True) if data_query_time is not None: normalize_timestamp_to_query_time( raw, data_query_time, data_query_tz, inplace=True, ts_field=TS_FIELD_NAME, ) return EventsLoader( events=raw, next_value_columns=self._next_value_columns, previous_value_columns=self._previous_value_columns, ).load_adjusted_array( columns, dates, assets, mask, )
def make_loader(cls, events, next_value_columns, previous_value_columns): # This method exists to be overridden by BlazeEventsLoaderTestCase return EventsLoader(events, next_value_columns, previous_value_columns)
def make_loader(cls, events, next_value_columns, previous_value_columns): # This method exists to be overridden by EventsLoaderTestCases using alternative loaders return EventsLoader(events, next_value_columns, previous_value_columns)