def test_missing_values_assertion(self): sessions = self.trading_calendar.sessions_in_range( TEST_CALENDAR_START, TEST_CALENDAR_STOP, ) sessions_with_gap = sessions[sessions != self.MISSING_DATA_DAY] bar_data = make_bar_data(self.make_equity_info(), sessions_with_gap) writer = BcolzDailyBarWriter( self.tmpdir.path, self.trading_calendar, sessions[0], sessions[-1], ) # There are 21 sessions between the start and end date for this # asset, and we excluded one. expected_msg = re.escape( "Got 20 rows for daily bars table with first day=2015-06-02, last " "day=2015-06-30, expected 21 rows.\n" "Missing sessions: " "[Timestamp('2015-06-15 00:00:00+0000', tz='UTC')]\n" "Extra sessions: []" ) with self.assertRaisesRegexp(AssertionError, expected_msg): writer.write(bar_data)
def make_equity_daily_bar_data(cls, country_code, sids): # Create the data for all countries. return make_bar_data( EQUITY_INFO.loc[list(sids)], cls.equity_daily_bar_days, holes=merge(HOLES.values()), )
def test_missing_values_assertion(self): sessions = self.trading_calendar.sessions_in_range( TEST_CALENDAR_START, TEST_CALENDAR_STOP, ) sessions_with_gap = sessions[sessions != self.MISSING_DATA_DAY] bar_data = make_bar_data(self.make_equity_info(), sessions_with_gap) writer = BcolzDailyBarWriter( self.tmpdir.path, self.trading_calendar, sessions[0], sessions[-1], ) # There are 21 sessions between the start and end date for this # asset, and we excluded one. expected_msg = re.escape( "Got 20 rows for daily bars table with first day=2015-06-02, last " "day=2015-06-30, expected 21 rows.\n" "Missing sessions: " "[Timestamp('2015-06-15 00:00:00+0000', tz='UTC')]\n" "Extra sessions: []") with self.assertRaisesRegexp(AssertionError, expected_msg): writer.write(bar_data)
def make_equity_daily_bar_data(cls): return make_bar_data(cls.equity_info, cls.equity_daily_bar_days)
def test_ingest(self): start = pd.Timestamp('2014-01-06', tz='utc') end = pd.Timestamp('2014-01-10', tz='utc') calendar = get_calendar('NYSE') sessions = calendar.sessions_in_range(start, end) minutes = calendar.minutes_for_sessions_in_range(start, end) sids = tuple(range(3)) equities = make_simple_equity_info( sids, start, end, ) daily_bar_data = make_bar_data(equities, sessions) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-01-08'), 'ratio': first_split_ratio, 'sid': 0, }, { 'effective_date': str_to_seconds('2014-01-09'), 'ratio': second_split_ratio, 'sid': 1, }, ]) @self.register( 'bundle', calendar=calendar, start_session=start, end_session=end, ) def bundle_ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): assert_is(environ, self.environ) asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert_is_instance(calendar, TradingCalendar) assert_is_instance(cache, dataframe_cache) assert_is_instance(show_progress, bool) self.ingest('bundle', environ=self.environ) bundle = self.load('bundle', environ=self.environ) assert_equal(set(bundle.asset_finder.sids), set(sids)) columns = 'open', 'high', 'low', 'close', 'volume' actual = bundle.equity_minute_bar_reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(minutes, equities, colname), msg=colname, ) actual = bundle.equity_daily_bar_reader.load_raw_arrays( columns, start, end, sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(sessions, equities, colname), msg=colname, ) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( columns, sessions, pd.Index(sids), ) for column, adjustments in zip(columns, adjustments_for_cols[:-1]): # iterate over all the adjustments but `volume` assert_equal( adjustments, { 2: [Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio, )], 3: [Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio, )], }, msg=column, ) # check the volume, the value should be 1/ratio assert_equal( adjustments_for_cols[-1], { 2: [Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio, )], 3: [Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio, )], }, msg='volume', )
def make_daily_bar_data(cls): return make_bar_data( EQUITY_INFO, cls.bcolz_daily_bar_days, )
def make_equity_daily_bar_data(cls, country_code, sids): return make_bar_data( EQUITY_INFO, cls.equity_daily_bar_days, )
def test_ingest(self): calendar = get_calendar('XNYS') sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE) minutes = calendar.minutes_for_sessions_in_range( self.START_DATE, self.END_DATE, ) sids = tuple(range(3)) equities = make_simple_equity_info( sids, self.START_DATE, self.END_DATE, ) daily_bar_data = make_bar_data(equities, sessions) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-01-08'), 'ratio': first_split_ratio, 'sid': 0, }, { 'effective_date': str_to_seconds('2014-01-09'), 'ratio': second_split_ratio, 'sid': 1, }, ]) @self.register( 'bundle', calendar_name='NYSE', start_session=self.START_DATE, end_session=self.END_DATE, ) def bundle_ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, output_dir): assert_is(environ, self.environ) asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert_is_instance(calendar, TradingCalendar) assert_is_instance(cache, dataframe_cache) self.ingest('bundle', environ=self.environ) bundle = self.load('bundle', environ=self.environ) assert_equal(set(bundle.asset_finder.sids), set(sids)) columns = 'open', 'high', 'low', 'close', 'volume' actual = bundle.equity_minute_bar_reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(minutes, sids, equities, colname), msg=colname, ) actual = bundle.equity_daily_bar_reader.load_raw_arrays( columns, self.START_DATE, self.END_DATE, sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(sessions, sids, equities, colname), msg=colname, ) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( columns, sessions, pd.Index(sids), ) for column, adjustments in zip(columns, adjs_for_cols[:-1]): # iterate over all the adjustments but `volume` assert_equal( adjustments, { 2: [Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio, )], 3: [Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio, )], }, msg=column, ) # check the volume, the value should be 1/ratio assert_equal( adjs_for_cols[-1], { 2: [Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio, )], 3: [Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio, )], }, msg='volume', )
def make_equity_daily_bar_data(cls): return make_bar_data( EQUITY_INFO, cls.equity_daily_bar_days, )
def make_equity_daily_bar_data(cls, country_code, sids): return make_bar_data( EQUITY_INFO.loc[list(sids)], cls.equity_daily_bar_days, holes=HOLES, )
def test_ingest(self): calendar = get_calendar("NYSE") sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE) minutes = calendar.minutes_for_sessions_in_range(self.START_DATE, self.END_DATE) sids = tuple(range(3)) equities = make_simple_equity_info(sids, self.START_DATE, self.END_DATE) daily_bar_data = make_bar_data(equities, sessions) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records( [ {"effective_date": str_to_seconds("2014-01-08"), "ratio": first_split_ratio, "sid": 0}, {"effective_date": str_to_seconds("2014-01-09"), "ratio": second_split_ratio, "sid": 1}, ] ) @self.register("bundle", calendar_name="NYSE", start_session=self.START_DATE, end_session=self.END_DATE) def bundle_ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir, ): assert_is(environ, self.environ) asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert_is_instance(calendar, TradingCalendar) assert_is_instance(cache, dataframe_cache) assert_is_instance(show_progress, bool) self.ingest("bundle", environ=self.environ) bundle = self.load("bundle", environ=self.environ) assert_equal(set(bundle.asset_finder.sids), set(sids)) columns = "open", "high", "low", "close", "volume" actual = bundle.equity_minute_bar_reader.load_raw_arrays(columns, minutes[0], minutes[-1], sids) for actual_column, colname in zip(actual, columns): assert_equal(actual_column, expected_bar_values_2d(minutes, equities, colname), msg=colname) actual = bundle.equity_daily_bar_reader.load_raw_arrays(columns, self.START_DATE, self.END_DATE, sids) for actual_column, colname in zip(actual, columns): assert_equal(actual_column, expected_bar_values_2d(sessions, equities, colname), msg=colname) adjustments_for_cols = bundle.adjustment_reader.load_adjustments(columns, sessions, pd.Index(sids)) for column, adjustments in zip(columns, adjustments_for_cols[:-1]): # iterate over all the adjustments but `volume` assert_equal( adjustments, { 2: [Float64Multiply(first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio)], 3: [Float64Multiply(first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio)], }, msg=column, ) # check the volume, the value should be 1/ratio assert_equal( adjustments_for_cols[-1], { 2: [Float64Multiply(first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio)], 3: [Float64Multiply(first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio)], }, msg="volume", )
def test_ingest(self): start = pd.Timestamp('2014-01-06', tz='utc') end = pd.Timestamp('2014-01-10', tz='utc') trading_days = get_calendar('NYSE').all_trading_days calendar = trading_days[trading_days.slice_indexer(start, end)] minutes = get_calendar('NYSE').trading_minutes_for_days_in_range( calendar[0], calendar[-1]) sids = tuple(range(3)) equities = make_simple_equity_info( sids, calendar[0], calendar[-1], ) daily_bar_data = make_bar_data(equities, calendar) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records([ { 'effective_date': str_to_seconds('2014-01-08'), 'ratio': first_split_ratio, 'sid': 0, }, { 'effective_date': str_to_seconds('2014-01-09'), 'ratio': second_split_ratio, 'sid': 1, }, ]) schedule = get_calendar('NYSE').schedule @self.register( 'bundle', calendar=calendar, opens=schedule.market_open[calendar[0]:calendar[-1]], closes=schedule.market_close[calendar[0]:calendar[-1]], ) def bundle_ingest(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, cache, show_progress, output_dir): assert_is(environ, self.environ) asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert_is_instance(calendar, pd.DatetimeIndex) assert_is_instance(cache, dataframe_cache) assert_is_instance(show_progress, bool) self.ingest('bundle', environ=self.environ) bundle = self.load('bundle', environ=self.environ) assert_equal(set(bundle.asset_finder.sids), set(sids)) columns = 'open', 'high', 'low', 'close', 'volume' actual = bundle.equity_minute_bar_reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(minutes, equities, colname), msg=colname, ) actual = bundle.equity_daily_bar_reader.load_raw_arrays( columns, calendar[0], calendar[-1], sids, ) for actual_column, colname in zip(actual, columns): assert_equal( actual_column, expected_bar_values_2d(calendar, equities, colname), msg=colname, ) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( columns, calendar, pd.Index(sids), ) for column, adjustments in zip(columns, adjustments_for_cols[:-1]): # iterate over all the adjustments but `volume` assert_equal( adjustments, { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio, ) ], }, msg=column, ) # check the volume, the value should be 1/ratio assert_equal( adjustments_for_cols[-1], { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio, ) ], }, msg='volume', )
def make_equity_daily_bar_data(cls): return make_bar_data( cls.equity_info, cls.equity_daily_bar_days, )
def test_ingest(self): calendar = get_calendar("XNYS") sessions = calendar.sessions_in_range(self.START_DATE, self.END_DATE) minutes = calendar.minutes_for_sessions_in_range( self.START_DATE, self.END_DATE, ) sids = tuple(range(3)) equities = make_simple_equity_info( sids, self.START_DATE, self.END_DATE, ) daily_bar_data = make_bar_data(equities, sessions) minute_bar_data = make_bar_data(equities, minutes) first_split_ratio = 0.5 second_split_ratio = 0.1 splits = pd.DataFrame.from_records([ { "effective_date": str_to_seconds("2014-01-08"), "ratio": first_split_ratio, "sid": 0, }, { "effective_date": str_to_seconds("2014-01-09"), "ratio": second_split_ratio, "sid": 1, }, ]) @self.register( "bundle", calendar_name="NYSE", start_session=self.START_DATE, end_session=self.END_DATE, ) def bundle_ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir, ): assert environ is self.environ asset_db_writer.write(equities=equities) minute_bar_writer.write(minute_bar_data) daily_bar_writer.write(daily_bar_data) adjustment_writer.write(splits=splits) assert isinstance(calendar, TradingCalendar) assert isinstance(cache, dataframe_cache) assert isinstance(show_progress, bool) self.ingest("bundle", environ=self.environ) bundle = self.load("bundle", environ=self.environ) assert set(bundle.asset_finder.sids) == set(sids) columns = "open", "high", "low", "close", "volume" actual = bundle.equity_minute_bar_reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ) for actual_column, colname in zip(actual, columns): np.testing.assert_array_equal( actual_column, expected_bar_values_2d(minutes, sids, equities, colname), err_msg=colname, ) actual = bundle.equity_daily_bar_reader.load_raw_arrays( columns, self.START_DATE, self.END_DATE, sids, ) for actual_column, colname in zip(actual, columns): np.testing.assert_array_equal( actual_column, expected_bar_values_2d(sessions, sids, equities, colname), err_msg=colname, ) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( columns, sessions, pd.Index(sids), ) for column, adjustments in zip(columns, adjs_for_cols[:-1]): # iterate over all the adjustments but `volume` assert adjustments == { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=second_split_ratio, ) ], }, column # check the volume, the value should be 1/ratio assert adjs_for_cols[-1] == { 2: [ Float64Multiply( first_row=0, last_row=2, first_col=0, last_col=0, value=1 / first_split_ratio, ) ], 3: [ Float64Multiply( first_row=0, last_row=3, first_col=1, last_col=1, value=1 / second_split_ratio, ) ], }, "volume"
def make_daily_bar_data(cls): return make_bar_data( cls.equity_info, cls.bcolz_daily_bar_days, )