def test_unadjusted_minutes_early_close(self): """ Test unadjusted minute window, ensuring that early closes are filtered out. """ day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC') xmas_eve = Timestamp('2015-12-24', tz='UTC') market_day_after_xmas = Timestamp('2015-12-28', tz='UTC') minutes = [self.market_closes[day_before_thanksgiving] - Timedelta('2 min'), self.market_closes[xmas_eve] - Timedelta('1 min'), self.market_opens[market_day_after_xmas] + Timedelta('1 min')] sids = [1, 2] data_1 = DataFrame( data={ 'open': [ 15.0, 15.1, 15.2], 'high': [17.0, 17.1, 17.2], 'low': [11.0, 11.1, 11.3], 'close': [14.0, 14.1, 14.2], 'volume': [1000, 1001, 1002], }, index=minutes) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame( data={ 'open': [25.0, 25.1, 25.2], 'high': [27.0, 27.1, 27.2], 'low': [21.0, 21.1, 21.2], 'close': [24.0, 24.1, 24.2], 'volume': [2000, 2001, 2002], }, index=minutes) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list(map(transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} start_minute_loc = \ self.trading_calendar.all_minutes.get_loc(minutes[0]) minute_locs = [ self.trading_calendar.all_minutes.get_loc(minute) - start_minute_loc for minute in minutes ] for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid].loc[minutes, col], arrays[i][j][minute_locs])
def test_minute_updates(self): """ Test minute updates. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min')] sids = [1, 2] data_1 = DataFrame( data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) data_2 = DataFrame( data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) frames = {1: data_1, 2: data_2} update_path = self.instance_tmpdir.getpath('updates.h5') update_writer = H5MinuteBarUpdateWriter(update_path) update_writer.write(frames) update_reader = H5MinuteBarUpdateReader(update_path) self.writer.write(update_reader.read(minutes, sids)) # Refresh the reader since truncate update the metadata. reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list(map(transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j])
def test_truncate_all_data_points(self): tds = self.market_opens.index days = tds[tds.slice_indexer(start=self.test_calendar_start + 1, end=self.test_calendar_start + 3)] minutes = DatetimeIndex( [self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120)] ) sid = 1 data = DataFrame( data={ "open": [10.0, 11.0], "high": [20.0, 21.0], "low": [30.0, 31.0], "close": [40.0, 41.0], "volume": [50.0, 51.0], }, index=minutes, ) self.writer.write_sid(sid, data) # Truncate to first day in the calendar, a day before the first # day with minute data. self.writer.truncate(self.test_calendar_start) # Refresh the reader since truncate update the metadata. self.reader = BcolzMinuteBarReader(self.dest) self.assertEqual(self.writer.last_date_in_output_for_sid(sid), self.test_calendar_start) cal = self.trading_calendar _, last_close = cal.open_and_close_for_session(self.test_calendar_start) self.assertEqual(self.reader.last_available_dt, last_close)
def test_truncate_between_data_points(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write_sid(sid, data) # Open a new writer to cover `open` method, also truncating only # applies to an existing directory. writer = BcolzMinuteBarWriter.open(self.dest) # Truncate to first day with data. writer.truncate(days[0]) # Refresh the reader since truncate update the metadata. self.reader = BcolzMinuteBarReader(self.dest) self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0]) cal = self.trading_calendar _, last_close = cal.open_and_close_for_session(days[0]) self.assertEqual(self.reader.last_available_dt, last_close) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price)
def test_append_on_new_day(self): sid = 1 ohlcv = { 'open': [2.0], 'high': [3.0], 'low': [1.0], 'close': [2.0], 'volume': [10.0] } dt = self.market_opens[TEST_CALENDAR_STOP] data = DataFrame( data=ohlcv, index=[dt]) self.writer.write_sid(sid, data) # Open a new writer to cover `open` method, also a common usage # of appending new days will be writing to an existing directory. cday = self.trading_calendar.schedule.index.freq new_end_session = TEST_CALENDAR_STOP + cday writer = BcolzMinuteBarWriter.open(self.dest, new_end_session) next_day_minute = dt + cday new_data = DataFrame( data=ohlcv, index=[next_day_minute]) writer.write_sid(sid, new_data) # Get a new reader to test updated calendar. reader = BcolzMinuteBarReader(self.dest) second_minute = dt + Timedelta(minutes=1) # The second minute should have been padded with zeros for col in ('open', 'high', 'low', 'close'): assert_almost_equal( nan, reader.get_value(sid, second_minute, col) ) self.assertEqual( 0, reader.get_value(sid, second_minute, 'volume') ) # The next day minute should have data. for col in ('open', 'high', 'low', 'close', 'volume'): assert_almost_equal( ohlcv[col], reader.get_value(sid, next_day_minute, col) )
def init_instance_fixtures(self): super(BcolzMinuteBarTestCase, self).init_instance_fixtures() self.dest = self.instance_tmpdir.getpath("minute_bars") os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( self.dest, self.trading_calendar, TEST_CALENDAR_START, TEST_CALENDAR_STOP, US_EQUITIES_MINUTES_PER_DAY ) self.reader = BcolzMinuteBarReader(self.dest)
def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min')] sids = [1, 2] data_1 = DataFrame( data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame( data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list(map(transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j])
def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath("minute_bars") os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY ) self.reader = BcolzMinuteBarReader(self.dest)
def test_truncate_between_data_points(self): tds = self.market_opens.index days = tds[tds.slice_indexer(start=self.test_calendar_start + 1, end=self.test_calendar_start + 3)] minutes = DatetimeIndex( [self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120)] ) sid = 1 data = DataFrame( data={ "open": [10.0, 11.0], "high": [20.0, 21.0], "low": [30.0, 31.0], "close": [40.0, 41.0], "volume": [50.0, 51.0], }, index=minutes, ) self.writer.write_sid(sid, data) # Truncate to first day with data. self.writer.truncate(days[0]) # Refresh the reader since truncate update the metadata. self.reader = BcolzMinuteBarReader(self.dest) self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0]) cal = self.trading_calendar _, last_close = cal.open_and_close_for_session(days[0]) self.assertEqual(self.reader.last_available_dt, last_close) minute = minutes[0] open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(50.0, volume_price)
def test_write_one_ohlcv_with_ratios(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={"open": [10.0], "high": [20.0], "low": [30.0], "close": [40.0], "volume": [50.0]}, index=[minute] ) # Create a new writer with `ohlc_ratios_per_sid` defined. writer_with_ratios = BcolzMinuteBarWriter( self.dest, self.trading_calendar, TEST_CALENDAR_START, TEST_CALENDAR_STOP, US_EQUITIES_MINUTES_PER_DAY, ohlc_ratios_per_sid={sid: 25}, ) writer_with_ratios.write_sid(sid, data) reader = BcolzMinuteBarReader(self.dest) open_price = reader.get_value(sid, minute, "open") self.assertEquals(10.0, open_price) high_price = reader.get_value(sid, minute, "high") self.assertEquals(20.0, high_price) low_price = reader.get_value(sid, minute, "low") self.assertEquals(30.0, low_price) close_price = reader.get_value(sid, minute, "close") self.assertEquals(40.0, close_price) volume_price = reader.get_value(sid, minute, "volume") self.assertEquals(50.0, volume_price)
def init_instance_fixtures(self): super(BcolzMinuteBarTestCase, self).init_instance_fixtures() self.dest = self.instance_tmpdir.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, self.market_closes, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest)
def test_write_one_ohlcv_with_ratios(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = pd.DataFrame( data={ "open": [10.0], "high": [20.0], "low": [30.0], "close": [40.0], "volume": [50.0], }, index=[minute], ) # Create a new writer with `ohlc_ratios_per_sid` defined. writer_with_ratios = BcolzMinuteBarWriter( self.dest, self.trading_calendar, TEST_CALENDAR_START, TEST_CALENDAR_STOP, US_EQUITIES_MINUTES_PER_DAY, ohlc_ratios_per_sid={sid: 25}, ) writer_with_ratios.write_sid(sid, data) reader = BcolzMinuteBarReader(self.dest) open_price = reader.get_value(sid, minute, "open") assert 10.0 == open_price high_price = reader.get_value(sid, minute, "high") assert 20.0 == high_price low_price = reader.get_value(sid, minute, "low") assert 30.0 == low_price close_price = reader.get_value(sid, minute, "close") assert 40.0 == close_price volume_price = reader.get_value(sid, minute, "volume") assert 50.0 == volume_price
def test_write_one_ohlcv_with_ratios(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0], }, index=[minute], ) # Create a new writer with `ohlc_ratios_per_sid` defined. writer_with_ratios = BcolzMinuteBarWriter( self.dest, self.trading_calendar, TEST_CALENDAR_START, TEST_CALENDAR_STOP, US_EQUITIES_MINUTES_PER_DAY, ohlc_ratios_per_sid={sid: 25}, ) writer_with_ratios.write_sid(sid, data) reader = BcolzMinuteBarReader(self.dest) open_price = reader.get_value(sid, minute, 'open') self.assertEqual(10.0, open_price) high_price = reader.get_value(sid, minute, 'high') self.assertEqual(20.0, high_price) low_price = reader.get_value(sid, minute, 'low') self.assertEqual(30.0, low_price) close_price = reader.get_value(sid, minute, 'close') self.assertEqual(40.0, close_price) volume_price = reader.get_value(sid, minute, 'volume') self.assertEqual(50.0, volume_price)
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open all_market_closes = cls.env.open_and_closes.market_close indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP ) cls.market_opens = all_market_opens[indexer] cls.market_closes = all_market_closes[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, self.market_closes, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sids[0], data) data = DataFrame( data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) def test_nans(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame( data={ 'open': full(9, nan), 'high': full(9, nan), 'low': full(9, nan), 'close': full(9, nan), 'volume': full(9, 0), }, index=[minutes]) self.writer.write(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = self.reader.unadjusted_window( fields, minutes[0], minutes[-1], [sid]) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_differing_nans(self): """ Also test nans of differing values/construction. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame( data={ 'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)). view(float64), 'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)). view(float64), 'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)). view(float64), 'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)). view(float64), 'volume': full(9, 0), }, index=[minutes]) self.writer.write(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = self.reader.unadjusted_window( fields, minutes[0], minutes[-1], [sid]) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min')] sids = [1, 2] data_1 = DataFrame( data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) self.writer.write(sids[0], data_1) data_2 = DataFrame( data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) self.writer.write(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = reader.unadjusted_window( columns, minutes[0], minutes[-1], sids) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j])
def test_unadjusted_minutes_market_breaks(self): """ Test unadjusted minute window, ensuring that market breaks are filtered out. """ MINUTES_PER_DAY = 360 trading_calendar = get_calendar("XTKS") writer = BcolzMinuteBarWriter( self.dest, trading_calendar, TEST_CALENDAR_START, TEST_CALENDAR_STOP, MINUTES_PER_DAY, ) sample_date = Timestamp('2015-11-25', tz='UTC') minutes = [ # before break trading_calendar.break_starts[sample_date] - Timedelta('1 min'), # after break trading_calendar.break_ends[sample_date] + Timedelta('1 min'), trading_calendar.break_ends[sample_date] + Timedelta('2 min') ] sids = [1, 2] data_1 = DataFrame(data={ 'open': [15.0, 15.1, 15.2], 'high': [17.0, 17.1, 17.2], 'low': [11.0, 11.1, 11.3], 'close': [14.0, 14.1, 14.2], 'volume': [1000, 1001, 1002], }, index=minutes) writer.write_sid(sids[0], data_1) data_2 = DataFrame(data={ 'open': [25.0, 25.1, 25.2], 'high': [27.0, 27.1, 27.2], 'low': [21.0, 21.1, 21.2], 'close': [24.0, 24.1, 24.2], 'volume': [2000, 2001, 2002], }, index=minutes) writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list( map( transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) print(arrays) data = {sids[0]: data_1, sids[1]: data_2} print('data') print(data) start_minute_loc = \ trading_calendar.all_minutes.get_loc(minutes[0]) minute_locs = [ trading_calendar.all_minutes.get_loc(minute) - start_minute_loc for minute in minutes ] for i, col in enumerate(columns): for j, sid in enumerate(sids): print('sid', sid, 'col', col) print('i', i, 'j', j) print(minutes) print(minute_locs) expected = data[sid].loc[minutes, col] actual = arrays[i][j][minute_locs] assert_almost_equal(expected, actual)
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP) cls.market_opens = all_market_opens[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer(start=self.test_calendar_start + 1, end=self.test_calendar_start + 3)] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame(data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sids[0], data) data = DataFrame(data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] data = DataFrame(data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [ start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min') ] sids = [1, 2] data_1 = DataFrame(data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) self.writer.write(sids[0], data_1) data_2 = DataFrame(data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) self.writer.write(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = reader.unadjusted_window(columns, minutes[0], minutes[-1], sids) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j])
def test_volume_share_slippage(self): tempdir = TempDirectory() try: assets = { 133: pd.DataFrame({ "open": [3.00], "high": [3.15], "low": [2.85], "close": [3.00], "volume": [200], "dt": [self.minutes[0]] }).set_index("dt") } write_bcolz_minute_data( self.env, pd.date_range( start=normalize_date(self.minutes[0]), end=normalize_date(self.minutes[-1]) ), tempdir.path, assets ) equity_minute_reader = BcolzMinuteBarReader(tempdir.path) data_portal = DataPortal( self.env, equity_minute_reader=equity_minute_reader, ) slippage_model = VolumeShareSlippage() open_orders = [ Order( dt=datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), amount=100, filled=0, sid=self.ASSET133 ) ] bar_data = BarData(data_portal, lambda: self.minutes[0], 'minute') orders_txns = list(slippage_model.simulate( bar_data, self.ASSET133, open_orders, )) self.assertEquals(len(orders_txns), 1) _, txn = orders_txns[0] expected_txn = { 'price': float(3.0001875), 'dt': datetime.datetime( 2006, 1, 5, 14, 31, tzinfo=pytz.utc), 'amount': int(5), 'sid': int(133), 'commission': None, 'type': DATASOURCE_TYPE.TRANSACTION, 'order_id': open_orders[0].id } self.assertIsNotNone(txn) # TODO: Make expected_txn an Transaction object and ensure there # is a __eq__ for that class. self.assertEquals(expected_txn, txn.__dict__) open_orders = [ Order( dt=datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc), amount=100, filled=0, sid=self.ASSET133 ) ] # Set bar_data to be a minute ahead of last trade. # Volume share slippage should not execute when there is no trade. bar_data = BarData(data_portal, lambda: self.minutes[1], 'minute') orders_txns = list(slippage_model.simulate( bar_data, self.ASSET133, open_orders, )) self.assertEquals(len(orders_txns), 0) finally: tempdir.cleanup()
def create_data_portal_from_trade_history(env, tempdir, sim_params, trades_by_sid): if sim_params.data_frequency == "daily": path = os.path.join(tempdir.path, "testdaily.bcolz") assets = {} for sidint, trades in iteritems(trades_by_sid): opens = [] highs = [] lows = [] closes = [] volumes = [] for trade in trades: opens.append(trade["open_price"]) highs.append(trade["high"]) lows.append(trade["low"]) closes.append(trade["close_price"]) volumes.append(trade["volume"]) assets[sidint] = pd.DataFrame({ "open": np.array(opens), "high": np.array(highs), "low": np.array(lows), "close": np.array(closes), "volume": np.array(volumes), "day": [day.value for day in sim_params.trading_days] }, index=sim_params.trading_days) DailyBarWriterFromDataFrames(assets).write( path, sim_params.trading_days, assets ) equity_daily_reader = BcolzDailyBarReader(path) return DataPortal( env, equity_daily_reader=equity_daily_reader, ) else: minutes = env.minutes_for_days_in_range( sim_params.first_open, sim_params.last_close ) length = len(minutes) assets = {} for sidint, trades in iteritems(trades_by_sid): opens = np.zeros(length) highs = np.zeros(length) lows = np.zeros(length) closes = np.zeros(length) volumes = np.zeros(length) for trade in trades: # put them in the right place idx = minutes.searchsorted(trade.dt) opens[idx] = trade.open_price * 1000 highs[idx] = trade.high * 1000 lows[idx] = trade.low * 1000 closes[idx] = trade.close_price * 1000 volumes[idx] = trade.volume assets[sidint] = pd.DataFrame({ "open": opens, "high": highs, "low": lows, "close": closes, "volume": volumes, "dt": minutes }).set_index("dt") write_bcolz_minute_data( env, env.days_in_range( sim_params.first_open, sim_params.last_close ), tempdir.path, assets ) equity_minute_reader = BcolzMinuteBarReader(tempdir.path) return DataPortal( env, equity_minute_reader=equity_minute_reader, )
class BcolzMinuteBarTestCase(WithTradingCalendars, WithAssetFinder, WithInstanceTmpDir, ZiplineTestCase): ASSET_FINDER_EQUITY_SIDS = 1, 2 @classmethod def init_class_fixtures(cls): super(BcolzMinuteBarTestCase, cls).init_class_fixtures() cal = cls.trading_calendar.schedule.loc[ TEST_CALENDAR_START:TEST_CALENDAR_STOP] cls.market_opens = cal.market_open.dt.tz_localize("UTC") cls.market_closes = cal.market_close.dt.tz_localize("UTC") cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def init_instance_fixtures(self): super(BcolzMinuteBarTestCase, self).init_instance_fixtures() self.dest = self.instance_tmpdir.getpath("minute_bars") os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( self.dest, self.trading_calendar, TEST_CALENDAR_START, TEST_CALENDAR_STOP, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def test_version(self): metadata = self.reader._get_metadata() self.assertEquals( metadata.version, BcolzMinuteBarMetadata.FORMAT_VERSION, ) def test_no_minute_bars_for_sid(self): minute = self.market_opens[self.test_calendar_start] with self.assertRaises(NoDataForSid): self.reader.get_value(1337, minute, "close") def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ "open": [10.0], "high": [20.0], "low": [30.0], "close": [40.0], "volume": [50.0], }, index=[minute], ) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(50.0, volume_price) def test_precision_after_scaling(self): """For numbers that don't have an exact float representation, assert that scaling the value does not cause a loss in precision. """ minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ "open": [130.23], "high": [130.23], "low": [130.23], "close": [130.23], "volume": [1000], }, index=[minute], ) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(130.23, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(130.23, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(130.23, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(130.23, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(1000, volume_price) def test_write_one_ohlcv_with_ratios(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ "open": [10.0], "high": [20.0], "low": [30.0], "close": [40.0], "volume": [50.0], }, index=[minute], ) # Create a new writer with `ohlc_ratios_per_sid` defined. writer_with_ratios = BcolzMinuteBarWriter( self.dest, self.trading_calendar, TEST_CALENDAR_START, TEST_CALENDAR_STOP, US_EQUITIES_MINUTES_PER_DAY, ohlc_ratios_per_sid={sid: 25}, ) writer_with_ratios.write_sid(sid, data) reader = BcolzMinuteBarReader(self.dest) open_price = reader.get_value(sid, minute, "open") self.assertEquals(10.0, open_price) high_price = reader.get_value(sid, minute, "high") self.assertEquals(20.0, high_price) low_price = reader.get_value(sid, minute, "low") self.assertEquals(30.0, low_price) close_price = reader.get_value(sid, minute, "close") self.assertEquals(40.0, close_price) volume_price = reader.get_value(sid, minute, "volume") self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame( data={ "open": [10.0, 11.0], "high": [20.0, 21.0], "low": [30.0, 31.0], "close": [40.0, 41.0], "volume": [50.0, 51.0], }, index=[minute_0, minute_1], ) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute_0, "open") self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, "high") self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, "low") self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, "close") self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, "volume") self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, "open") self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, "high") self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, "low") self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, "close") self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, "volume") self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + timedelta(days=1) minute = self.market_opens[second_day] sid = 1 data = DataFrame( data={ "open": [10.0], "high": [20.0], "low": [30.0], "close": [40.0], "volume": [50.0], }, index=[minute], ) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ "open": [0], "high": [0], "low": [0], "close": [0], "volume": [0] }, index=[minute], ) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, "open") assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, "high") assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, "low") assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, "close") assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, "volume") assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + timedelta(days=1), end=self.test_calendar_start + timedelta(days=3), )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ "open": [10.0, 11.0], "high": [20.0, 21.0], "low": [30.0, 31.0], "close": [40.0, 41.0], "volume": [50.0, 51.0], }, index=minutes, ) self.writer.write_sid(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame( data={ "open": [10.0], "high": [20.0], "low": [30.0], "close": [40.0], "volume": [50.0], }, index=[minute], ) self.writer.write_sid(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write_sid(sid, data) def test_append_to_same_day(self): """ Test writing data with the same date as existing data in our file. """ sid = 1 first_minute = self.market_opens[TEST_CALENDAR_START] data = DataFrame( data={ "open": [10.0], "high": [20.0], "low": [30.0], "close": [40.0], "volume": [50.0], }, index=[first_minute], ) self.writer.write_sid(sid, data) # Write data in the same day as the previous minute second_minute = first_minute + Timedelta(minutes=1) new_data = DataFrame( data={ "open": [5.0], "high": [10.0], "low": [3.0], "close": [7.0], "volume": [10.0], }, index=[second_minute], ) self.writer.write_sid(sid, new_data) open_price = self.reader.get_value(sid, second_minute, "open") self.assertEquals(5.0, open_price) high_price = self.reader.get_value(sid, second_minute, "high") self.assertEquals(10.0, high_price) low_price = self.reader.get_value(sid, second_minute, "low") self.assertEquals(3.0, low_price) close_price = self.reader.get_value(sid, second_minute, "close") self.assertEquals(7.0, close_price) volume_price = self.reader.get_value(sid, second_minute, "volume") self.assertEquals(10.0, volume_price) def test_append_on_new_day(self): sid = 1 ohlcv = { "open": [2.0], "high": [3.0], "low": [1.0], "close": [2.0], "volume": [10.0], } dt = self.market_opens[TEST_CALENDAR_STOP] data = DataFrame(data=ohlcv, index=[dt]) self.writer.write_sid(sid, data) # Open a new writer to cover `open` method, also a common usage # of appending new days will be writing to an existing directory. cday = self.trading_calendar.schedule.index.freq new_end_session = TEST_CALENDAR_STOP + cday writer = BcolzMinuteBarWriter.open(self.dest, new_end_session) next_day_minute = dt + cday new_data = DataFrame(data=ohlcv, index=[next_day_minute]) writer.write_sid(sid, new_data) # Get a new reader to test updated calendar. reader = BcolzMinuteBarReader(self.dest) second_minute = dt + Timedelta(minutes=1) # The second minute should have been padded with zeros for col in ("open", "high", "low", "close"): assert_almost_equal(nan, reader.get_value(sid, second_minute, col)) self.assertEqual(0, reader.get_value(sid, second_minute, "volume")) # The next day minute should have data. for col in ("open", "high", "low", "close", "volume"): assert_almost_equal(ohlcv[col], reader.get_value(sid, next_day_minute, col)) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame( data={ "open": [15.0], "high": [17.0], "low": [11.0], "close": [15.0], "volume": [100.0], }, index=[minute], ) self.writer.write_sid(sids[0], data) data = DataFrame( data={ "open": [25.0], "high": [27.0], "low": [21.0], "close": [25.0], "volume": [200.0], }, index=[minute], ) self.writer.write_sid(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq day = TEST_CALENDAR_START + freq minute = self.market_opens[day] data = DataFrame( data={ "open": [15.0], "high": [17.0], "low": [11.0], "close": [15.0], "volume": [100.0], }, index=[minute], ) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(100.0, volume_price) # Check that if we then pad the rest of this day, we end up with # 2 days worth of minutes. self.writer.pad(sid, day) self.assertEqual( len(self.writer._ensure_ctable(sid)), self.writer._minutes_per_day * 2, ) def test_nans(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq="min") data = DataFrame( data={ "open": full(9, nan), "high": full(9, nan), "low": full(9, nan), "close": full(9, nan), "volume": full(9, 0.0), }, index=minutes, ) self.writer.write_sid(sid, data) fields = ["open", "high", "low", "close", "volume"] ohlcv_window = list( map( transpose, self.reader.load_raw_arrays( fields, minutes[0], minutes[-1], [sid], ), )) for i, field in enumerate(fields): if field != "volume": assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_differing_nans(self): """ Also test nans of differing values/construction. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq="min") data = DataFrame( data={ "open": ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).view(float64), "high": ((0b11111111111 << 52) + arange(11, 20, dtype=int64)).view(float64), "low": ((0b11111111111 << 52) + arange(21, 30, dtype=int64)).view(float64), "close": ((0b11111111111 << 52) + arange(31, 40, dtype=int64)).view(float64), "volume": full(9, 0.0), }, index=minutes, ) self.writer.write_sid(sid, data) fields = ["open", "high", "low", "close", "volume"] ohlcv_window = list( map( transpose, self.reader.load_raw_arrays( fields, minutes[0], minutes[-1], [sid], ), )) for i, field in enumerate(fields): if field != "volume": assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { "open": array([10.0, 11.0]), "high": array([20.0, 21.0]), "low": array([30.0, 31.0]), "close": array([40.0, 41.0]), "volume": array([50.0, 51.0]), } dts = array([minute_0, minute_1], dtype="datetime64[s]") self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, "open") self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, "high") self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, "low") self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, "close") self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, "volume") self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, "open") self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, "high") self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, "low") self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, "close") self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, "volume") self.assertEquals(51.0, volume_price) def test_write_cols_mismatch_length(self): dts = date_range(self.market_opens[self.test_calendar_start], periods=2, freq="min").asi8.astype("datetime64[s]") sid = 1 cols = { "open": array([10.0, 11.0, 12.0]), "high": array([20.0, 21.0]), "low": array([30.0, 31.0, 33.0, 34.0]), "close": array([40.0, 41.0]), "volume": array([50.0, 51.0, 52.0]), } with self.assertRaises(BcolzMinuteWriterColumnMismatch): self.writer.write_cols(sid, dts, cols) def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [ start_minute, start_minute + Timedelta("1 min"), start_minute + Timedelta("2 min"), ] sids = [1, 2] data_1 = DataFrame( data={ "open": [15.0, nan, 15.1], "high": [17.0, nan, 17.1], "low": [11.0, nan, 11.1], "close": [14.0, nan, 14.1], "volume": [1000, 0, 1001], }, index=minutes, ) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame( data={ "open": [25.0, nan, 25.1], "high": [27.0, nan, 27.1], "low": [21.0, nan, 21.1], "close": [24.0, nan, 24.1], "volume": [2000, 0, 2001], }, index=minutes, ) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ["open", "high", "low", "close", "volume"] sids = [sids[0], sids[1]] arrays = list( map( transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ), )) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j]) def test_unadjusted_minutes_early_close(self): """ Test unadjusted minute window, ensuring that early closes are filtered out. """ day_before_thanksgiving = Timestamp("2015-11-25", tz="UTC") xmas_eve = Timestamp("2015-12-24", tz="UTC") market_day_after_xmas = Timestamp("2015-12-28", tz="UTC") minutes = [ self.market_closes[day_before_thanksgiving] - Timedelta("2 min"), self.market_closes[xmas_eve] - Timedelta("1 min"), self.market_opens[market_day_after_xmas] + Timedelta("1 min"), ] sids = [1, 2] data_1 = DataFrame( data={ "open": [15.0, 15.1, 15.2], "high": [17.0, 17.1, 17.2], "low": [11.0, 11.1, 11.3], "close": [14.0, 14.1, 14.2], "volume": [1000, 1001, 1002], }, index=minutes, ) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame( data={ "open": [25.0, 25.1, 25.2], "high": [27.0, 27.1, 27.2], "low": [21.0, 21.1, 21.2], "close": [24.0, 24.1, 24.2], "volume": [2000, 2001, 2002], }, index=minutes, ) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ["open", "high", "low", "close", "volume"] sids = [sids[0], sids[1]] arrays = list( map( transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ), )) data = {sids[0]: data_1, sids[1]: data_2} start_minute_loc = self.trading_calendar.all_minutes.get_loc( minutes[0]) minute_locs = [ self.trading_calendar.all_minutes.get_loc(minute) - start_minute_loc for minute in minutes ] for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid].loc[minutes, col], arrays[i][j][minute_locs]) def test_adjust_non_trading_minutes(self): start_day = Timestamp("2015-06-01", tz="UTC") end_day = Timestamp("2015-06-02", tz="UTC") sid = 1 cols = { "open": arange(1, 781), "high": arange(1, 781), "low": arange(1, 781), "close": arange(1, 781), "volume": arange(1, 781), } dts = array( self.trading_calendar.minutes_for_sessions_in_range( self.trading_calendar.minute_to_session_label(start_day), self.trading_calendar.minute_to_session_label(end_day), )) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value(sid, Timestamp("2015-06-01 20:00:00", tz="UTC"), "open"), 390, ) self.assertEqual( self.reader.get_value(sid, Timestamp("2015-06-02 20:00:00", tz="UTC"), "open"), 780, ) with self.assertRaises(NoDataOnDate): self.reader.get_value(sid, Timestamp("2015-06-02", tz="UTC"), "open") with self.assertRaises(NoDataOnDate): self.reader.get_value(sid, Timestamp("2015-06-02 20:01:00", tz="UTC"), "open") def test_adjust_non_trading_minutes_half_days(self): # half day start_day = Timestamp("2015-11-27", tz="UTC") end_day = Timestamp("2015-11-30", tz="UTC") sid = 1 cols = { "open": arange(1, 601), "high": arange(1, 601), "low": arange(1, 601), "close": arange(1, 601), "volume": arange(1, 601), } dts = array( self.trading_calendar.minutes_for_sessions_in_range( self.trading_calendar.minute_to_session_label(start_day), self.trading_calendar.minute_to_session_label(end_day), )) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value(sid, Timestamp("2015-11-27 18:00:00", tz="UTC"), "open"), 210, ) self.assertEqual( self.reader.get_value(sid, Timestamp("2015-11-30 21:00:00", tz="UTC"), "open"), 600, ) self.assertEqual( self.reader.get_value(sid, Timestamp("2015-11-27 18:01:00", tz="UTC"), "open"), 210, ) with self.assertRaises(NoDataOnDate): self.reader.get_value(sid, Timestamp("2015-11-30", tz="UTC"), "open") with self.assertRaises(NoDataOnDate): self.reader.get_value(sid, Timestamp("2015-11-30 21:01:00", tz="UTC"), "open") def test_set_sid_attrs(self): """Confirm that we can set the attributes of a sid's file correctly.""" sid = 1 start_day = Timestamp("2015-11-27", tz="UTC") end_day = Timestamp("2015-06-02", tz="UTC") attrs = { "start_day": start_day.value / int(1e9), "end_day": end_day.value / int(1e9), "factor": 100, } # Write the attributes self.writer.set_sid_attrs(sid, **attrs) # Read the attributes for k, v in attrs.items(): self.assertEqual(self.reader.get_sid_attr(sid, k), v) def test_truncate_between_data_points(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + timedelta(days=1), end=self.test_calendar_start + timedelta(days=3), )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ "open": [10.0, 11.0], "high": [20.0, 21.0], "low": [30.0, 31.0], "close": [40.0, 41.0], "volume": [50.0, 51.0], }, index=minutes, ) self.writer.write_sid(sid, data) # Open a new writer to cover `open` method, also truncating only # applies to an existing directory. writer = BcolzMinuteBarWriter.open(self.dest) # Truncate to first day with data. writer.truncate(days[0]) # Refresh the reader since truncate update the metadata. self.reader = BcolzMinuteBarReader(self.dest) self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0]) cal = self.trading_calendar _, last_close = cal.open_and_close_for_session(days[0]) self.assertEqual(self.reader.last_available_dt, last_close) minute = minutes[0] open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(50.0, volume_price) def test_truncate_all_data_points(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + timedelta(days=1), end=self.test_calendar_start + timedelta(days=3), )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ "open": [10.0, 11.0], "high": [20.0, 21.0], "low": [30.0, 31.0], "close": [40.0, 41.0], "volume": [50.0, 51.0], }, index=minutes, ) self.writer.write_sid(sid, data) # Truncate to first day in the calendar, a day before the first # day with minute data. self.writer.truncate(self.test_calendar_start) # Refresh the reader since truncate update the metadata. self.reader = BcolzMinuteBarReader(self.dest) self.assertEqual( self.writer.last_date_in_output_for_sid(sid), self.test_calendar_start, ) cal = self.trading_calendar _, last_close = cal.open_and_close_for_session( self.test_calendar_start) self.assertEqual(self.reader.last_available_dt, last_close) def test_early_market_close(self): # Date to test is 2015-11-30 9:31 # Early close is 2015-11-27 18:00 friday_after_tday = Timestamp("2015-11-27", tz="UTC") friday_after_tday_close = self.market_closes[friday_after_tday] before_early_close = friday_after_tday_close - timedelta(minutes=8) after_early_close = friday_after_tday_close + timedelta(minutes=8) monday_after_tday = Timestamp("2015-11-30", tz="UTC") minute = self.market_opens[monday_after_tday] # Test condition where there is data written after the market # close (ideally, this should not occur in datasets, but guards # against consumers of the minute bar writer, which do not filter # out after close minutes. minutes = [before_early_close, after_early_close, minute] sid = 1 data = DataFrame( data={ "open": [10.0, 11.0, nan], "high": [20.0, 21.0, nan], "low": [30.0, 31.0, nan], "close": [40.0, 41.0, nan], "volume": [50, 51, 0], }, index=minutes, ) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, "open") assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, "high") assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, "low") assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, "close") assert_almost_equal(nan, close_price) volume = self.reader.get_value(sid, minute, "volume") self.assertEquals(0, volume) asset = self.asset_finder.retrieve_asset(sid) last_traded_dt = self.reader.get_last_traded_dt(asset, minute) self.assertEquals( last_traded_dt, before_early_close, "The last traded dt should be before the early " "close, even when data is written between the early " "close and the next open.", ) def test_minute_updates(self): """ Test minute updates. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [ start_minute, start_minute + Timedelta("1 min"), start_minute + Timedelta("2 min"), ] sids = [1, 2] data_1 = DataFrame( data={ "open": [15.0, nan, 15.1], "high": [17.0, nan, 17.1], "low": [11.0, nan, 11.1], "close": [14.0, nan, 14.1], "volume": [1000, 0, 1001], }, index=minutes, ) data_2 = DataFrame( data={ "open": [25.0, nan, 25.1], "high": [27.0, nan, 27.1], "low": [21.0, nan, 21.1], "close": [24.0, nan, 24.1], "volume": [2000, 0, 2001], }, index=minutes, ) frames = {1: data_1, 2: data_2} update_path = self.instance_tmpdir.getpath("updates.h5") update_writer = H5MinuteBarUpdateWriter(update_path) update_writer.write(frames) update_reader = H5MinuteBarUpdateReader(update_path) self.writer.write(update_reader.read(minutes, sids)) # Refresh the reader since truncate update the metadata. reader = BcolzMinuteBarReader(self.dest) columns = ["open", "high", "low", "close", "volume"] sids = [sids[0], sids[1]] arrays = list( map( transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ), )) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j])
def fills2reader(self, tempdir, minutes, fills, orders): if len(minutes)==0: return None for _,fill in fills.items(): fill["open"] = fill["close"] fill["high"] = fill["close"] fill["low"] = fill["close"] # since the below abs affects the original dataframe, storing the sign for later revert fill["is_neg"] = fill["volume"]<0 # take absolute value, since negatives are split in the factory function to begin with # and zipline doesnt support negative OHLC volumes (which dont make sense anyway) fill["volume"] = abs(fill["volume"]) # append empty OHLC dataframes for sid's in orders but not (yet) in fills # dummy OHLC data with volume=0 so as not to affect orders empty = {"open":[0], "high":[0], "low":[0], "close":[0], "volume":[0], "dt":[minutes[0]], "is_neg":[False]} for sid in orders: if sid not in fills: fills[sid]=pd.DataFrame(empty).set_index("dt") d1 = self.trading_calendar.minute_to_session_label( minutes[0] ) d2=self.trading_calendar.minute_to_session_label( minutes[-1] ) days = self.trading_calendar.sessions_in_range(d1, d2) #print("minutes",minutes) #print("days: %s, %s, %s" % (d1, d2, days)) #path = os.path.join(tempdir.path, "testdata.bcolz") path = tempdir.path writer = BcolzMinuteBarWriter( rootdir=path, calendar=self.trading_calendar, start_session=days[0], end_session=days[-1], minutes_per_day=1440 ) #print("Writer session labels: %s" % (writer._session_labels)) #print('last date for sid 1', writer.last_date_in_output_for_sid(1)) #print('last date for sid 2', writer.last_date_in_output_for_sid(2)) #for f in iteritems(fills): print("fill",f) writer.write(iteritems(fills)) # now that the data is written, revert the volume sign and drop the extra columns for _,fill in fills.items(): del fill["open"] del fill["high"] del fill["low"] if any(fill["is_neg"]): fill.loc[fill["is_neg"],"volume"] = -1 * fill["volume"] del fill["is_neg"] #print("temp path: %s" % (path)) reader = BcolzMinuteBarReader(path) return reader
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open all_market_closes = cls.env.open_and_closes.market_close indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP) cls.market_opens = all_market_opens[indexer] cls.market_closes = all_market_closes[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, self.market_closes, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer(start=self.test_calendar_start + 1, end=self.test_calendar_start + 3)] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame(data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sids[0], data) data = DataFrame(data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] data = DataFrame(data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) def test_nans(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame(data={ 'open': full(9, nan), 'high': full(9, nan), 'low': full(9, nan), 'close': full(9, nan), 'volume': full(9, 0), }, index=[minutes]) self.writer.write(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = self.reader.unadjusted_window(fields, minutes[0], minutes[-1], [sid]) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_differing_nans(self): """ Also test nans of differing values/construction. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame(data={ 'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).view(float64), 'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)).view(float64), 'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)).view(float64), 'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)).view(float64), 'volume': full(9, 0), }, index=[minutes]) self.writer.write(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = self.reader.unadjusted_window(fields, minutes[0], minutes[-1], [sid]) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_cols_mismatch_length(self): dts = date_range(self.market_opens[self.test_calendar_start], periods=2, freq='min').asi8.astype('datetime64[s]') sid = 1 cols = { 'open': array([10.0, 11.0, 12.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0, 33.0, 34.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0, 52.0]) } with self.assertRaises(BcolzMinuteWriterColumnMismatch): self.writer.write_cols(sid, dts, cols) def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [ start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min') ] sids = [1, 2] data_1 = DataFrame(data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) self.writer.write(sids[0], data_1) data_2 = DataFrame(data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) self.writer.write(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = reader.unadjusted_window(columns, minutes[0], minutes[-1], sids) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j]) def test_unadjusted_minutes_early_close(self): """ Test unadjusted minute window, ensuring that early closes are filtered out. """ day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC') xmas_eve = Timestamp('2015-12-24', tz='UTC') market_day_after_xmas = Timestamp('2015-12-28', tz='UTC') minutes = [ self.market_closes[day_before_thanksgiving] - Timedelta('2 min'), self.market_closes[xmas_eve] - Timedelta('1 min'), self.market_opens[market_day_after_xmas] + Timedelta('1 min') ] sids = [1, 2] data_1 = DataFrame(data={ 'open': [15.0, 15.1, 15.2], 'high': [17.0, 17.1, 17.2], 'low': [11.0, 11.1, 11.3], 'close': [14.0, 14.1, 14.2], 'volume': [1000, 1001, 1002], }, index=minutes) self.writer.write(sids[0], data_1) data_2 = DataFrame(data={ 'open': [25.0, 25.1, 25.2], 'high': [27.0, 27.1, 27.2], 'low': [21.0, 21.1, 21.2], 'close': [24.0, 24.1, 24.2], 'volume': [2000, 2001, 2002], }, index=minutes) self.writer.write(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = reader.unadjusted_window(columns, minutes[0], minutes[-1], sids) data = {sids[0]: data_1, sids[1]: data_2} start_minute_loc = self.env.market_minutes.get_loc(minutes[0]) minute_locs = [ self.env.market_minutes.get_loc(minute) - start_minute_loc for minute in minutes ] for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid].loc[minutes, col], arrays[i][j][minute_locs]) def test_adjust_non_trading_minutes(self): start_day = Timestamp('2015-06-01', tz='UTC') end_day = Timestamp('2015-06-02', tz='UTC') sid = 1 cols = { 'open': arange(1, 781), 'high': arange(1, 781), 'low': arange(1, 781), 'close': arange(1, 781), 'volume': arange(1, 781) } dts = array(self.env.minutes_for_days_in_range(start_day, end_day)) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-06-01 20:00:00', tz='UTC'), 'open'), 390) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-06-02 20:00:00', tz='UTC'), 'open'), 780) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-06-02', tz='UTC'), 'open'), 390) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-06-02 20:01:00', tz='UTC'), 'open'), 780) def test_adjust_non_trading_minutes_half_days(self): # half day start_day = Timestamp('2015-11-27', tz='UTC') end_day = Timestamp('2015-11-30', tz='UTC') sid = 1 cols = { 'open': arange(1, 601), 'high': arange(1, 601), 'low': arange(1, 601), 'close': arange(1, 601), 'volume': arange(1, 601) } dts = array(self.env.minutes_for_days_in_range(start_day, end_day)) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-27 18:00:00', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-30 21:00:00', tz='UTC'), 'open'), 600) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-27 18:01:00', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-30', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-30 21:01:00', tz='UTC'), 'open'), 600)
def test_minute_updates(self): """ Test minute updates. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [ start_minute, start_minute + Timedelta("1 min"), start_minute + Timedelta("2 min"), ] sids = [1, 2] data_1 = DataFrame( data={ "open": [15.0, nan, 15.1], "high": [17.0, nan, 17.1], "low": [11.0, nan, 11.1], "close": [14.0, nan, 14.1], "volume": [1000, 0, 1001], }, index=minutes, ) data_2 = DataFrame( data={ "open": [25.0, nan, 25.1], "high": [27.0, nan, 27.1], "low": [21.0, nan, 21.1], "close": [24.0, nan, 24.1], "volume": [2000, 0, 2001], }, index=minutes, ) frames = {1: data_1, 2: data_2} update_path = self.instance_tmpdir.getpath("updates.h5") update_writer = H5MinuteBarUpdateWriter(update_path) update_writer.write(frames) update_reader = H5MinuteBarUpdateReader(update_path) self.writer.write(update_reader.read(minutes, sids)) # Refresh the reader since truncate update the metadata. reader = BcolzMinuteBarReader(self.dest) columns = ["open", "high", "low", "close", "volume"] sids = [sids[0], sids[1]] arrays = list( map( transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ), )) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j])
def transaction_sim(self, **params): """ This is a utility method that asserts expected results for conversion of orders to transactions given a trade history""" tempdir = TempDirectory() try: trade_count = params['trade_count'] trade_interval = params['trade_interval'] order_count = params['order_count'] order_amount = params['order_amount'] order_interval = params['order_interval'] expected_txn_count = params['expected_txn_count'] expected_txn_volume = params['expected_txn_volume'] # optional parameters # --------------------- # if present, alternate between long and short sales alternate = params.get('alternate') # if present, expect transaction amounts to match orders exactly. complete_fill = params.get('complete_fill') env = TradingEnvironment() sid = 1 if trade_interval < timedelta(days=1): sim_params = factory.create_simulation_parameters( data_frequency="minute") minutes = env.market_minute_window( sim_params.first_open, int((trade_interval.total_seconds() / 60) * trade_count) + 100) price_data = np.array([10.1] * len(minutes)) assets = { sid: pd.DataFrame({ "open": price_data, "high": price_data, "low": price_data, "close": price_data, "volume": np.array([100] * len(minutes)), "dt": minutes }).set_index("dt") } write_bcolz_minute_data( env, env.days_in_range(minutes[0], minutes[-1]), tempdir.path, assets) equity_minute_reader = BcolzMinuteBarReader(tempdir.path) data_portal = DataPortal( env, equity_minute_reader=equity_minute_reader, ) else: sim_params = factory.create_simulation_parameters( data_frequency="daily") days = sim_params.trading_days assets = { 1: pd.DataFrame( { "open": [10.1] * len(days), "high": [10.1] * len(days), "low": [10.1] * len(days), "close": [10.1] * len(days), "volume": [100] * len(days), "day": [day.value for day in days] }, index=days) } path = os.path.join(tempdir.path, "testdata.bcolz") DailyBarWriterFromDataFrames(assets).write(path, days, assets) equity_daily_reader = BcolzDailyBarReader(path) data_portal = DataPortal( env, equity_daily_reader=equity_daily_reader, ) if "default_slippage" not in params or \ not params["default_slippage"]: slippage_func = FixedSlippage() else: slippage_func = None blotter = Blotter(sim_params.data_frequency, self.env.asset_finder, slippage_func) env.write_data( equities_data={ sid: { "start_date": sim_params.trading_days[0], "end_date": sim_params.trading_days[-1] } }) start_date = sim_params.first_open if alternate: alternator = -1 else: alternator = 1 tracker = PerformanceTracker(sim_params, self.env) # replicate what tradesim does by going through every minute or day # of the simulation and processing open orders each time if sim_params.data_frequency == "minute": ticks = minutes else: ticks = days transactions = [] order_list = [] order_date = start_date for tick in ticks: blotter.current_dt = tick if tick >= order_date and len(order_list) < order_count: # place an order direction = alternator**len(order_list) order_id = blotter.order( blotter.asset_finder.retrieve_asset(sid), order_amount * direction, MarketOrder()) order_list.append(blotter.orders[order_id]) order_date = order_date + order_interval # move after market orders to just after market next # market open. if order_date.hour >= 21: if order_date.minute >= 00: order_date = order_date + timedelta(days=1) order_date = order_date.replace(hour=14, minute=30) else: bar_data = BarData(data_portal, lambda: tick, sim_params.data_frequency) txns, _ = blotter.get_transactions(bar_data) for txn in txns: tracker.process_transaction(txn) transactions.append(txn) for i in range(order_count): order = order_list[i] self.assertEqual(order.sid, sid) self.assertEqual(order.amount, order_amount * alternator**i) if complete_fill: self.assertEqual(len(transactions), len(order_list)) total_volume = 0 for i in range(len(transactions)): txn = transactions[i] total_volume += txn.amount if complete_fill: order = order_list[i] self.assertEqual(order.amount, txn.amount) self.assertEqual(total_volume, expected_txn_volume) self.assertEqual(len(transactions), expected_txn_count) cumulative_pos = tracker.position_tracker.positions[sid] if total_volume == 0: self.assertIsNone(cumulative_pos) else: self.assertEqual(total_volume, cumulative_pos.amount) # the open orders should not contain sid. oo = blotter.open_orders self.assertNotIn(sid, oo, "Entry is removed when no open orders") finally: tempdir.cleanup()
def test_unadjusted_minutes_early_close(self): """ Test unadjusted minute window, ensuring that early closes are filtered out. """ day_before_thanksgiving = pd.Timestamp("2015-11-25", tz="UTC") xmas_eve = pd.Timestamp("2015-12-24", tz="UTC") market_day_after_xmas = pd.Timestamp("2015-12-28", tz="UTC") minutes = [ self.market_closes[day_before_thanksgiving] - pd.Timedelta("2 min"), self.market_closes[xmas_eve] - pd.Timedelta("1 min"), self.market_opens[market_day_after_xmas] + pd.Timedelta("1 min"), ] sids = [1, 2] data_1 = pd.DataFrame( data={ "open": [15.0, 15.1, 15.2], "high": [17.0, 17.1, 17.2], "low": [11.0, 11.1, 11.3], "close": [14.0, 14.1, 14.2], "volume": [1000, 1001, 1002], }, index=minutes, ) self.writer.write_sid(sids[0], data_1) data_2 = pd.DataFrame( data={ "open": [25.0, 25.1, 25.2], "high": [27.0, 27.1, 27.2], "low": [21.0, 21.1, 21.2], "close": [24.0, 24.1, 24.2], "volume": [2000, 2001, 2002], }, index=minutes, ) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ["open", "high", "low", "close", "volume"] sids = [sids[0], sids[1]] arrays = list( map( np.transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ), )) data = {sids[0]: data_1, sids[1]: data_2} start_minute_loc = self.trading_calendar.all_minutes.get_loc( minutes[0]) minute_locs = [ self.trading_calendar.all_minutes.get_loc(minute) - start_minute_loc for minute in minutes ] for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid].loc[minutes, col], arrays[i][j][minute_locs])
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP ) cls.market_opens = all_market_opens[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data)
def test_unadjusted_minutes_early_close(self): """ Test unadjusted minute window, ensuring that early closes are filtered out. """ day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC') xmas_eve = Timestamp('2015-12-24', tz='UTC') market_day_after_xmas = Timestamp('2015-12-28', tz='UTC') minutes = [ self.market_closes[day_before_thanksgiving] - Timedelta('2 min'), self.market_closes[xmas_eve] - Timedelta('1 min'), self.market_opens[market_day_after_xmas] + Timedelta('1 min') ] sids = [1, 2] data_1 = DataFrame(data={ 'open': [15.0, 15.1, 15.2], 'high': [17.0, 17.1, 17.2], 'low': [11.0, 11.1, 11.3], 'close': [14.0, 14.1, 14.2], 'volume': [1000, 1001, 1002], }, index=minutes) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame(data={ 'open': [25.0, 25.1, 25.2], 'high': [27.0, 27.1, 27.2], 'low': [21.0, 21.1, 21.2], 'close': [24.0, 24.1, 24.2], 'volume': [2000, 2001, 2002], }, index=minutes) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list( map( transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} start_minute_loc = \ self.trading_calendar.all_minutes.get_loc(minutes[0]) minute_locs = [ self.trading_calendar.all_minutes.get_loc(minute) - start_minute_loc for minute in minutes ] for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid].loc[minutes, col], arrays[i][j][minute_locs])
def transaction_sim(self, **params): """This is a utility method that asserts expected results for conversion of orders to transactions given a trade history """ trade_count = params["trade_count"] trade_interval = params["trade_interval"] order_count = params["order_count"] order_amount = params["order_amount"] order_interval = params["order_interval"] expected_txn_count = params["expected_txn_count"] expected_txn_volume = params["expected_txn_volume"] # optional parameters # --------------------- # if present, alternate between long and short sales alternate = params.get("alternate") # if present, expect transaction amounts to match orders exactly. complete_fill = params.get("complete_fill") asset1 = self.asset_finder.retrieve_asset(1) with TempDirectory() as tempdir: if trade_interval < timedelta(days=1): sim_params = factory.create_simulation_parameters( start=self.start, end=self.end, data_frequency="minute") minutes = self.trading_calendar.minutes_window( sim_params.first_open, int((trade_interval.total_seconds() / 60) * trade_count) + 100, ) price_data = np.array([10.1] * len(minutes)) assets = { asset1.sid: pd.DataFrame({ "open": price_data, "high": price_data, "low": price_data, "close": price_data, "volume": np.array([100] * len(minutes)), "dt": minutes, }).set_index("dt") } write_bcolz_minute_data( self.trading_calendar, self.trading_calendar.sessions_in_range( self.trading_calendar.minute_to_session_label( minutes[0]), self.trading_calendar.minute_to_session_label( minutes[-1]), ), tempdir.path, assets.items(), ) equity_minute_reader = BcolzMinuteBarReader(tempdir.path) data_portal = DataPortal( self.asset_finder, self.trading_calendar, first_trading_day=equity_minute_reader.first_trading_day, equity_minute_reader=equity_minute_reader, ) else: sim_params = factory.create_simulation_parameters( data_frequency="daily") days = sim_params.sessions assets = { 1: pd.DataFrame( { "open": [10.1] * len(days), "high": [10.1] * len(days), "low": [10.1] * len(days), "close": [10.1] * len(days), "volume": [100] * len(days), "day": [day.value for day in days], }, index=days, ) } path = os.path.join(tempdir.path, "testdata.bcolz") BcolzDailyBarWriter(path, self.trading_calendar, days[0], days[-1]).write(assets.items()) equity_daily_reader = BcolzDailyBarReader(path) data_portal = DataPortal( self.asset_finder, self.trading_calendar, first_trading_day=equity_daily_reader.first_trading_day, equity_daily_reader=equity_daily_reader, ) if "default_slippage" not in params or not params[ "default_slippage"]: slippage_func = FixedBasisPointsSlippage() else: slippage_func = None blotter = SimulationBlotter(slippage_func) start_date = sim_params.first_open if alternate: alternator = -1 else: alternator = 1 tracker = MetricsTracker( trading_calendar=self.trading_calendar, first_session=sim_params.start_session, last_session=sim_params.end_session, capital_base=sim_params.capital_base, emission_rate=sim_params.emission_rate, data_frequency=sim_params.data_frequency, asset_finder=self.asset_finder, metrics=load_metrics_set("none"), ) # replicate what tradesim does by going through every minute or day # of the simulation and processing open orders each time if sim_params.data_frequency == "minute": ticks = minutes else: ticks = days transactions = [] order_list = [] order_date = start_date for tick in ticks: blotter.current_dt = tick if tick >= order_date and len(order_list) < order_count: # place an order direction = alternator**len(order_list) order_id = blotter.order( asset1, order_amount * direction, MarketOrder(), ) order_list.append(blotter.orders[order_id]) order_date = order_date + order_interval # move after market orders to just after market next # market open. if order_date.hour >= 21: if order_date.minute >= 00: order_date = order_date + timedelta(days=1) order_date = order_date.replace(hour=14, minute=30) else: bar_data = BarData( data_portal=data_portal, simulation_dt_func=lambda: tick, data_frequency=sim_params.data_frequency, trading_calendar=self.trading_calendar, restrictions=NoRestrictions(), ) txns, _, closed_orders = blotter.get_transactions(bar_data) for txn in txns: tracker.process_transaction(txn) transactions.append(txn) blotter.prune_orders(closed_orders) for i in range(order_count): order = order_list[i] assert order.asset == asset1 assert order.amount == order_amount * alternator**i if complete_fill: assert len(transactions) == len(order_list) total_volume = 0 for i in range(len(transactions)): txn = transactions[i] total_volume += txn.amount if complete_fill: order = order_list[i] assert order.amount == txn.amount assert total_volume == expected_txn_volume assert len(transactions) == expected_txn_count if total_volume == 0: with pytest.raises(KeyError): tracker.positions[asset1] else: cumulative_pos = tracker.positions[asset1] assert total_volume == cumulative_pos.amount # the open orders should not contain the asset. oo = blotter.open_orders assert asset1 not in oo, "Entry is removed when no open orders"
class BcolzMinuteBarTestCase(WithTradingCalendars, WithInstanceTmpDir, ZiplineTestCase): @classmethod def init_class_fixtures(cls): super(BcolzMinuteBarTestCase, cls).init_class_fixtures() cal = cls.trading_calendar.schedule.loc[ TEST_CALENDAR_START:TEST_CALENDAR_STOP ] cls.market_opens = cal.market_open cls.market_closes = cal.market_close cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def init_instance_fixtures(self): super(BcolzMinuteBarTestCase, self).init_instance_fixtures() self.dest = self.instance_tmpdir.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( self.dest, self.trading_calendar, TEST_CALENDAR_START, TEST_CALENDAR_STOP, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def test_version(self): metadata = self.reader._get_metadata() self.assertEquals( metadata.version, BcolzMinuteBarMetadata.FORMAT_VERSION, ) def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write_sid(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write_sid(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write_sid(sid, data) def test_append_to_same_day(self): """ Test writing data with the same date as existing data in our file. """ sid = 1 first_minute = self.market_opens[TEST_CALENDAR_START] data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[first_minute]) self.writer.write_sid(sid, data) # Write data in the same day as the previous minute second_minute = first_minute + Timedelta(minutes=1) new_data = DataFrame( data={ 'open': [5.0], 'high': [10.0], 'low': [3.0], 'close': [7.0], 'volume': [10.0] }, index=[second_minute]) self.writer.write_sid(sid, new_data) open_price = self.reader.get_value(sid, second_minute, 'open') self.assertEquals(5.0, open_price) high_price = self.reader.get_value(sid, second_minute, 'high') self.assertEquals(10.0, high_price) low_price = self.reader.get_value(sid, second_minute, 'low') self.assertEquals(3.0, low_price) close_price = self.reader.get_value(sid, second_minute, 'close') self.assertEquals(7.0, close_price) volume_price = self.reader.get_value(sid, second_minute, 'volume') self.assertEquals(10.0, volume_price) def test_append_on_new_day(self): sid = 1 ohlcv = { 'open': [2.0], 'high': [3.0], 'low': [1.0], 'close': [2.0], 'volume': [10.0] } first_minute = self.market_opens[TEST_CALENDAR_START] data = DataFrame( data=ohlcv, index=[first_minute]) self.writer.write_sid(sid, data) next_day_minute = first_minute + Timedelta(days=1) new_data = DataFrame( data=ohlcv, index=[next_day_minute]) self.writer.write_sid(sid, new_data) second_minute = first_minute + Timedelta(minutes=1) # The second minute should have been padded with zeros for col in ('open', 'high', 'low', 'close'): assert_almost_equal( nan, self.reader.get_value(sid, second_minute, col) ) self.assertEqual( 0, self.reader.get_value(sid, second_minute, 'volume') ) # The first day should contain US_EQUITIES_MINUTES_PER_DAY rows. # The second day should contain a single row. self.assertEqual( len(self.writer._ensure_ctable(sid)), US_EQUITIES_MINUTES_PER_DAY + 1, ) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write_sid(sids[0], data) data = DataFrame( data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write_sid(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq day = TEST_CALENDAR_START + freq minute = self.market_opens[day] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) # Check that if we then pad the rest of this day, we end up with # 2 days worth of minutes. self.writer.pad(sid, day) self.assertEqual( len(self.writer._ensure_ctable(sid)), self.writer._minutes_per_day * 2, ) def test_nans(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame( data={ 'open': full(9, nan), 'high': full(9, nan), 'low': full(9, nan), 'close': full(9, nan), 'volume': full(9, 0.0), }, index=[minutes]) self.writer.write_sid(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = list(map(transpose, self.reader.load_raw_arrays( fields, minutes[0], minutes[-1], [sid], ))) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_differing_nans(self): """ Also test nans of differing values/construction. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame( data={ 'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)). view(float64), 'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)). view(float64), 'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)). view(float64), 'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)). view(float64), 'volume': full(9, 0.0), }, index=[minutes]) self.writer.write_sid(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = list(map(transpose, self.reader.load_raw_arrays( fields, minutes[0], minutes[-1], [sid], ))) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_cols_mismatch_length(self): dts = date_range(self.market_opens[self.test_calendar_start], periods=2, freq='min').asi8.astype('datetime64[s]') sid = 1 cols = { 'open': array([10.0, 11.0, 12.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0, 33.0, 34.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0, 52.0]) } with self.assertRaises(BcolzMinuteWriterColumnMismatch): self.writer.write_cols(sid, dts, cols) def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min')] sids = [1, 2] data_1 = DataFrame( data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame( data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list(map(transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j]) def test_unadjusted_minutes_early_close(self): """ Test unadjusted minute window, ensuring that early closes are filtered out. """ day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC') xmas_eve = Timestamp('2015-12-24', tz='UTC') market_day_after_xmas = Timestamp('2015-12-28', tz='UTC') minutes = [self.market_closes[day_before_thanksgiving] - Timedelta('2 min'), self.market_closes[xmas_eve] - Timedelta('1 min'), self.market_opens[market_day_after_xmas] + Timedelta('1 min')] sids = [1, 2] data_1 = DataFrame( data={ 'open': [ 15.0, 15.1, 15.2], 'high': [17.0, 17.1, 17.2], 'low': [11.0, 11.1, 11.3], 'close': [14.0, 14.1, 14.2], 'volume': [1000, 1001, 1002], }, index=minutes) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame( data={ 'open': [25.0, 25.1, 25.2], 'high': [27.0, 27.1, 27.2], 'low': [21.0, 21.1, 21.2], 'close': [24.0, 24.1, 24.2], 'volume': [2000, 2001, 2002], }, index=minutes) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list(map(transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} start_minute_loc = \ self.trading_calendar.all_minutes.get_loc(minutes[0]) minute_locs = [ self.trading_calendar.all_minutes.get_loc(minute) - start_minute_loc for minute in minutes ] for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid].loc[minutes, col], arrays[i][j][minute_locs]) def test_adjust_non_trading_minutes(self): start_day = Timestamp('2015-06-01', tz='UTC') end_day = Timestamp('2015-06-02', tz='UTC') sid = 1 cols = { 'open': arange(1, 781), 'high': arange(1, 781), 'low': arange(1, 781), 'close': arange(1, 781), 'volume': arange(1, 781) } dts = array(self.trading_calendar.minutes_for_sessions_in_range( self.trading_calendar.minute_to_session_label(start_day), self.trading_calendar.minute_to_session_label(end_day) )) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-06-01 20:00:00', tz='UTC'), 'open'), 390) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-06-02 20:00:00', tz='UTC'), 'open'), 780) with self.assertRaises(NoDataOnDate): self.reader.get_value( sid, Timestamp('2015-06-02', tz='UTC'), 'open' ) with self.assertRaises(NoDataOnDate): self.reader.get_value( sid, Timestamp('2015-06-02 20:01:00', tz='UTC'), 'open' ) def test_adjust_non_trading_minutes_half_days(self): # half day start_day = Timestamp('2015-11-27', tz='UTC') end_day = Timestamp('2015-11-30', tz='UTC') sid = 1 cols = { 'open': arange(1, 601), 'high': arange(1, 601), 'low': arange(1, 601), 'close': arange(1, 601), 'volume': arange(1, 601) } dts = array( self.trading_calendar.minutes_for_sessions_in_range( self.trading_calendar.minute_to_session_label(start_day), self.trading_calendar.minute_to_session_label(end_day) ) ) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-11-27 18:00:00', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-11-30 21:00:00', tz='UTC'), 'open'), 600) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-11-27 18:01:00', tz='UTC'), 'open'), 210) with self.assertRaises(NoDataOnDate): self.reader.get_value( sid, Timestamp('2015-11-30', tz='UTC'), 'open' ) with self.assertRaises(NoDataOnDate): self.reader.get_value( sid, Timestamp('2015-11-30 21:01:00', tz='UTC'), 'open' ) def test_set_sid_attrs(self): """Confirm that we can set the attributes of a sid's file correctly. """ sid = 1 start_day = Timestamp('2015-11-27', tz='UTC') end_day = Timestamp('2015-06-02', tz='UTC') attrs = { 'start_day': start_day.value / int(1e9), 'end_day': end_day.value / int(1e9), 'factor': 100, } # Write the attributes self.writer.set_sid_attrs(sid, **attrs) # Read the attributes for k, v in attrs.items(): self.assertEqual(self.reader.get_sid_attr(sid, k), v) def test_truncate_between_data_points(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write_sid(sid, data) # Truncate to first day with data. self.writer.truncate(days[0]) # Refresh the reader since truncate update the metadata. self.reader = BcolzMinuteBarReader(self.dest) self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0]) cal = self.trading_calendar _, last_close = cal.open_and_close_for_session(days[0]) self.assertEqual(self.reader.last_available_dt, last_close) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_truncate_all_data_points(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write_sid(sid, data) # Truncate to first day in the calendar, a day before the first # day with minute data. self.writer.truncate(self.test_calendar_start) # Refresh the reader since truncate update the metadata. self.reader = BcolzMinuteBarReader(self.dest) self.assertEqual( self.writer.last_date_in_output_for_sid(sid), self.test_calendar_start, ) cal = self.trading_calendar _, last_close = cal.open_and_close_for_session( self.test_calendar_start) self.assertEqual(self.reader.last_available_dt, last_close)
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP ) cls.market_opens = all_market_opens[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sids[0], data) data = DataFrame( data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price)
def setUp(self): self.data_portal = DataPortal( self.env, equity_minute_reader=BcolzMinuteBarReader(self.tempdir.path), equity_daily_reader=self.build_daily_data(), adjustment_reader=self.adj_reader)
def test_truncate_between_data_points(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + timedelta(days=1), end=self.test_calendar_start + timedelta(days=3), )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ "open": [10.0, 11.0], "high": [20.0, 21.0], "low": [30.0, 31.0], "close": [40.0, 41.0], "volume": [50.0, 51.0], }, index=minutes, ) self.writer.write_sid(sid, data) # Open a new writer to cover `open` method, also truncating only # applies to an existing directory. writer = BcolzMinuteBarWriter.open(self.dest) # Truncate to first day with data. writer.truncate(days[0]) # Refresh the reader since truncate update the metadata. self.reader = BcolzMinuteBarReader(self.dest) self.assertEqual(self.writer.last_date_in_output_for_sid(sid), days[0]) cal = self.trading_calendar _, last_close = cal.open_and_close_for_session(days[0]) self.assertEqual(self.reader.last_available_dt, last_close) minute = minutes[0] open_price = self.reader.get_value(sid, minute, "open") self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, "high") self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, "low") self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, "close") self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, "volume") self.assertEquals(50.0, volume_price)
def create_data_portal_from_trade_history(asset_finder, trading_calendar, tempdir, sim_params, trades_by_sid): if sim_params.data_frequency == "daily": path = os.path.join(tempdir.path, "testdaily.bcolz") writer = BcolzDailyBarWriter( path, trading_calendar, sim_params.start_session, sim_params.end_session ) writer.write( trades_by_sid_to_dfs(trades_by_sid, sim_params.sessions), ) equity_daily_reader = BcolzDailyBarReader(path) return DataPortal( asset_finder, trading_calendar, first_trading_day=equity_daily_reader.first_trading_day, equity_daily_reader=equity_daily_reader, ) else: minutes = trading_calendar.minutes_in_range( sim_params.first_open, sim_params.last_close ) length = len(minutes) assets = {} for sidint, trades in iteritems(trades_by_sid): opens = np.zeros(length) highs = np.zeros(length) lows = np.zeros(length) closes = np.zeros(length) volumes = np.zeros(length) for trade in trades: # put them in the right place idx = minutes.searchsorted(trade.dt) opens[idx] = trade.open_price * 1000 highs[idx] = trade.high * 1000 lows[idx] = trade.low * 1000 closes[idx] = trade.close_price * 1000 volumes[idx] = trade.volume assets[sidint] = pd.DataFrame({ "open": opens, "high": highs, "low": lows, "close": closes, "volume": volumes, "dt": minutes }).set_index("dt") write_bcolz_minute_data( trading_calendar, sim_params.sessions, tempdir.path, assets ) equity_minute_reader = BcolzMinuteBarReader(tempdir.path) return DataPortal( asset_finder, trading_calendar, first_trading_day=equity_minute_reader.first_trading_day, equity_minute_reader=equity_minute_reader, )
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP) cls.market_opens = all_market_opens[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer(start=self.test_calendar_start + 1, end=self.test_calendar_start + 3)] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data)