def test_v5_to_v4_selects_most_recent_ticker(self): T = pd.Timestamp AssetDBWriter(self.engine).write( equities=pd.DataFrame( [['A', 'A', T('2014-01-01'), T('2014-01-02')], ['B', 'B', T('2014-01-01'), T('2014-01-02')], # these two are both ticker sid 2 ['B', 'C', T('2014-01-03'), T('2014-01-04')], ['C', 'C', T('2014-01-01'), T('2014-01-02')]], index=[0, 1, 2, 2], columns=['symbol', 'asset_name', 'start_date', 'end_date'], ), ) downgrade(self.engine, 4) metadata = sa.MetaData(self.engine) metadata.reflect() def select_fields(r): return r.sid, r.symbol, r.asset_name, r.start_date, r.end_date expected_data = { (0, 'A', 'A', T('2014-01-01').value, T('2014-01-02').value), (1, 'B', 'B', T('2014-01-01').value, T('2014-01-02').value), (2, 'B', 'C', T('2014-01-01').value, T('2014-01-04').value), } actual_data = set(map( select_fields, sa.select(metadata.tables['equities'].c).execute(), )) assert_equal(expected_data, actual_data)
def test_downgrade(self): # Attempt to downgrade a current assets db all the way down to v0 conn = self.engine.connect() # first downgrade to v3 downgrade(self.engine, 3) metadata = sa.MetaData(conn) metadata.reflect() check_version_info(conn, metadata.tables['version_info'], 3) self.assertFalse('exchange_full' in metadata.tables) # now go all the way to v0 downgrade(self.engine, 0) # Verify that the db version is now 0 metadata = sa.MetaData(conn) metadata.reflect() version_table = metadata.tables['version_info'] check_version_info(conn, version_table, 0) # Check some of the v1-to-v0 downgrades self.assertTrue('futures_contracts' in metadata.tables) self.assertTrue('version_info' in metadata.tables) self.assertFalse('tick_size' in metadata.tables['futures_contracts'].columns) self.assertTrue('contract_multiplier' in metadata.tables['futures_contracts'].columns)
def test_impossible_downgrade(self): # Attempt to downgrade a current assets db to a # higher-than-current version with self.assertRaises(AssetDBImpossibleDowngrade): downgrade(self.engine, ASSET_DB_VERSION + 5)
def ingest(name, environ=os.environ, timestamp=None, assets_versions=(), show_progress=False): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. assets_versions : Iterable[int], optional Versions of the assets db to which to downgrade. show_progress : bool, optional Tell the ingest function to display the progress where possible. """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) calendar = get_calendar(bundle.calendar_name) start_session = bundle.start_session end_session = bundle.end_session if start_session is None or start_session < calendar.first_session: start_session = calendar.first_session if end_session is None or end_session > calendar.last_session: end_session = calendar.last_session if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context( working_dir(pth.data_path([], environ=environ))) daily_bars_path = wd.ensure_dir( *daily_equity_relative(name, timestr)) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, calendar, start_session, end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_equity_relative(name, timestr)), calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) assets_db_path = wd.getpath(*asset_db_relative(name, timestr)) asset_db_writer = AssetDBWriter(assets_db_path) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative(name, timestr)), BcolzDailyBarReader(daily_bars_path), overwrite=True, )) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None if assets_versions: raise ValueError('Need to ingest a bundle that creates ' 'writers in order to downgrade the assets' ' db.') log.info("Ingesting {}.", name) bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, calendar, start_session, end_session, cache, show_progress, pth.data_path([name, timestr], environ=environ), ) for version in sorted(set(assets_versions), reverse=True): version_path = wd.getpath(*asset_db_relative( name, timestr, db_version=version, )) with working_file(version_path) as wf: shutil.copy2(assets_db_path, wf.path) downgrade(wf.path, version)
def ingest(name, environ=os.environ, timestamp=None, assets_versions=(), show_progress=False): """Ingest data for a given bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. By default this is os.environ. timestamp : datetime, optional The timestamp to use for the load. By default this is the current time. assets_versions : Iterable[int], optional Versions of the assets db to which to downgrade. show_progress : bool, optional Tell the ingest function to display the progress where possible. """ try: bundle = bundles[name] except KeyError: raise UnknownBundle(name) calendar = get_calendar(bundle.calendar_name) start_session = bundle.start_session end_session = bundle.end_session if start_session is None or start_session < calendar.first_session: start_session = calendar.first_session if end_session is None or end_session > calendar.last_session: end_session = calendar.last_session if timestamp is None: timestamp = pd.Timestamp.utcnow() timestamp = timestamp.tz_convert('utc').tz_localize(None) timestr = to_bundle_ingest_dirname(timestamp) cachepath = cache_path(name, environ=environ) pth.ensure_directory(pth.data_path([name, timestr], environ=environ)) pth.ensure_directory(cachepath) with dataframe_cache(cachepath, clean_on_failure=False) as cache, \ ExitStack() as stack: # we use `cleanup_on_failure=False` so that we don't purge the # cache directory if the load fails in the middle if bundle.create_writers: wd = stack.enter_context(working_dir( pth.data_path([], environ=environ)) ) daily_bars_path = wd.ensure_dir( *daily_equity_relative( name, timestr, environ=environ, ) ) daily_bar_writer = BcolzDailyBarWriter( daily_bars_path, calendar, start_session, end_session, ) # Do an empty write to ensure that the daily ctables exist # when we create the SQLiteAdjustmentWriter below. The # SQLiteAdjustmentWriter needs to open the daily ctables so # that it can compute the adjustment ratios for the dividends. daily_bar_writer.write(()) minute_bar_writer = BcolzMinuteBarWriter( wd.ensure_dir(*minute_equity_relative( name, timestr, environ=environ) ), calendar, start_session, end_session, minutes_per_day=bundle.minutes_per_day, ) assets_db_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, )) asset_db_writer = AssetDBWriter(assets_db_path) adjustment_db_writer = stack.enter_context( SQLiteAdjustmentWriter( wd.getpath(*adjustment_db_relative( name, timestr, environ=environ)), BcolzDailyBarReader(daily_bars_path), calendar.all_sessions, overwrite=True, ) ) else: daily_bar_writer = None minute_bar_writer = None asset_db_writer = None adjustment_db_writer = None if assets_versions: raise ValueError('Need to ingest a bundle that creates ' 'writers in order to downgrade the assets' ' db.') bundle.ingest( environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_db_writer, calendar, start_session, end_session, cache, show_progress, pth.data_path([name, timestr], environ=environ), ) for version in sorted(set(assets_versions), reverse=True): version_path = wd.getpath(*asset_db_relative( name, timestr, environ=environ, db_version=version, )) with working_file(version_path) as wf: shutil.copy2(assets_db_path, wf.path) downgrade(wf.path, version)