Пример #1
0
    def test_load_empty(self):
        """
        For the case where raw data is empty, make sure we have a result for
        all sids, that the dimensions are correct, and that we have the
        correct missing value.
        """
        raw_events = pd.DataFrame(
            columns=[
                "sid",
                "timestamp",
                "event_date",
                "float",
                "int",
                "datetime",
                "string",
            ]
        )
        next_value_columns = {
            EventDataSet_US.next_datetime: "datetime",
            EventDataSet_US.next_event_date: "event_date",
            EventDataSet_US.next_float: "float",
            EventDataSet_US.next_int: "int",
            EventDataSet_US.next_string: "string",
            EventDataSet_US.next_string_custom_missing: "string",
        }
        previous_value_columns = {
            EventDataSet_US.previous_datetime: "datetime",
            EventDataSet_US.previous_event_date: "event_date",
            EventDataSet_US.previous_float: "float",
            EventDataSet_US.previous_int: "int",
            EventDataSet_US.previous_string: "string",
            EventDataSet_US.previous_string_custom_missing: "string",
        }
        loader = EventsLoader(raw_events, next_value_columns, previous_value_columns)
        engine = SimplePipelineEngine(
            lambda x: loader,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline(
                {c.name: c.latest for c in EventDataSet_US.columns}, domain=US_EQUITIES
            ),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS)
        dates = self.trading_days

        expected = self.frame_containing_all_missing_values(
            index=pd.MultiIndex.from_product([dates, assets]),
            columns=EventDataSet_US.columns,
        )

        assert_equal(results, expected)
Пример #2
0
    def test_wrong_cols(self):
        # Test wrong cols (cols != expected)
        events = pd.DataFrame({
            'c': [5],
            SID_FIELD_NAME: [1],
            TS_FIELD_NAME: [pd.Timestamp('2014')],
            EVENT_DATE_FIELD_NAME: [pd.Timestamp('2014')],
        })

        EventsLoader(events, {EventDataSet.next_float: 'c'}, {})
        EventsLoader(events, {}, {EventDataSet.previous_float: 'c'})

        with self.assertRaises(ValueError) as e:
            EventsLoader(events, {EventDataSet.next_float: 'd'}, {})

        msg = str(e.exception)
        expected = (
            "EventsLoader missing required columns ['d'].\n"
            "Got Columns: ['c', 'event_date', 'sid', 'timestamp']\n"
            "Expected Columns: ['d', 'event_date', 'sid', 'timestamp']")
        self.assertEqual(msg, expected)
Пример #3
0
    def test_wrong_cols(self):
        # Test wrong cols (cols != expected)
        events = pd.DataFrame(
            {
                "c": [5],
                SID_FIELD_NAME: [1],
                TS_FIELD_NAME: [pd.Timestamp("2014")],
                EVENT_DATE_FIELD_NAME: [pd.Timestamp("2014")],
            }
        )

        EventsLoader(events, {EventDataSet_US.next_float: "c"}, {})
        EventsLoader(events, {}, {EventDataSet_US.previous_float: "c"})

        expected = (
            "EventsLoader missing required columns ['d'].\n"
            "Got Columns: ['c', 'event_date', 'sid', 'timestamp']\n"
            "Expected Columns: ['d', 'event_date', 'sid', 'timestamp']"
        )
        with pytest.raises(ValueError, match=re.escape(expected)):
            EventsLoader(events, {EventDataSet_US.next_float: "d"}, {})
Пример #4
0
    def load_adjusted_array(self, columns, dates, assets, mask):
        raw = load_raw_data(assets, dates, self._data_query_time,
                            self._data_query_tz, self._expr, self._odo_kwargs)

        return EventsLoader(
            events=raw,
            next_value_columns=self._next_value_columns,
            previous_value_columns=self._previous_value_columns,
        ).load_adjusted_array(
            columns,
            dates,
            assets,
            mask,
        )
Пример #5
0
    def load_adjusted_array(self, domain, columns, dates, sids, mask):
        raw = load_raw_data(
            sids,
            domain.data_query_cutoff_for_sessions(dates),
            self._expr,
            self._odo_kwargs,
        )

        return EventsLoader(
            events=raw,
            next_value_columns=self._next_value_columns,
            previous_value_columns=self._previous_value_columns,
        ).load_adjusted_array(
            domain,
            columns,
            dates,
            sids,
            mask,
        )
Пример #6
0
    def init_class_fixtures(cls):
        # This is a rare case where we actually want to do work **before** we
        # call init_class_fixtures.  We choose our sids for WithAssetFinder
        # based on the events generated by make_event_data.
        cls.raw_events = make_events(add_nulls=True)
        cls.raw_events_no_nulls = cls.raw_events[
            cls.raw_events['event_date'].notnull()
        ]
        cls.next_value_columns = {
            EventDataSet_US.next_datetime: 'datetime',
            EventDataSet_US.next_event_date: 'event_date',
            EventDataSet_US.next_float: 'float',
            EventDataSet_US.next_int: 'int',
            EventDataSet_US.next_string: 'string',
            EventDataSet_US.next_string_custom_missing: 'string'
        }
        cls.previous_value_columns = {
            EventDataSet_US.previous_datetime: 'datetime',
            EventDataSet_US.previous_event_date: 'event_date',
            EventDataSet_US.previous_float: 'float',
            EventDataSet_US.previous_int: 'int',
            EventDataSet_US.previous_string: 'string',
            EventDataSet_US.previous_string_custom_missing: 'string'
        }
        cls.loader = EventsLoader(
            cls.raw_events,
            cls.next_value_columns,
            cls.previous_value_columns,
        )
        cls.ASSET_FINDER_EQUITY_SIDS = list(cls.raw_events['sid'].unique())
        cls.ASSET_FINDER_EQUITY_SYMBOLS = [
            's' + str(n) for n in cls.ASSET_FINDER_EQUITY_SIDS
        ]
        super(EventsLoaderTestCase, cls).init_class_fixtures()

        cls.engine = SimplePipelineEngine(
            lambda c: cls.loader,
            asset_finder=cls.asset_finder,
            default_domain=US_EQUITIES,
        )
Пример #7
0
    def load_adjusted_array(self, columns, dates, assets, mask):
        data_query_time = self._data_query_time
        data_query_tz = self._data_query_tz
        lower_dt, upper_dt = normalize_data_query_bounds(
            dates[0],
            dates[-1],
            data_query_time,
            data_query_tz,
        )

        raw = ffill_query_in_range(
            self._expr,
            lower_dt,
            upper_dt,
            self._odo_kwargs,
        )
        sids = raw.loc[:, SID_FIELD_NAME]
        raw.drop(sids[~sids.isin(assets)].index, inplace=True)
        if data_query_time is not None:
            normalize_timestamp_to_query_time(
                raw,
                data_query_time,
                data_query_tz,
                inplace=True,
                ts_field=TS_FIELD_NAME,
            )

        return EventsLoader(
            events=raw,
            next_value_columns=self._next_value_columns,
            previous_value_columns=self._previous_value_columns,
        ).load_adjusted_array(
            columns,
            dates,
            assets,
            mask,
        )
Пример #8
0
 def make_loader(cls, events, next_value_columns, previous_value_columns):
     # This method exists to be overridden by BlazeEventsLoaderTestCase
     return EventsLoader(events, next_value_columns, previous_value_columns)
Пример #9
0
 def make_loader(cls, events, next_value_columns, previous_value_columns):
     # This method exists to be overridden by EventsLoaderTestCases using alternative loaders
     return EventsLoader(events, next_value_columns, previous_value_columns)