def test_bundle(self): # # 耗时3秒以内 ingest(TEST_BUNDLE_NAME) bundle = load(TEST_BUNDLE_NAME) sids = TEST_SIDS assert_equal(set(bundle.asset_finder.sids), set(sids)) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.start_date, 'bfill')], sessions[sessions.get_loc(self.end_date, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
def load_data(): print('Loading data...') from zipline.data.bundles import register from estimize.zipline.data.bundles.yahoo import yahoo_bundle tickers = { 'SPY', } register( 'yahoo', yahoo_bundle(tickers), ) bundles_module.ingest( 'yahoo', os.environ, pd.Timestamp.utcnow(), [], True, ) bundles_module.ingest( 'quantopian-quandl', os.environ, pd.Timestamp.utcnow(), [], True, )
def ingest_data(bundle_name): ingest_start_time = time.time() log.info('prepare "{}" dataset ......'.format(bundle_name)) bundles_module.ingest(bundle_name, show_progress=True) duration = format(time.time() - ingest_start_time, '0.2f') log.info('{} was done. duration is {} seconds'.format( bundle_name, duration))
def test_bundle(self): environ = { 'CSVDIR': test_resource_path('csvdir_samples', 'csvdir') } ingest('csvdir', environ=environ) bundle = load('csvdir', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, 'bfill')], sessions[sessions.get_loc(self.asset_end, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( self.columns, sessions, pd.Index(sids), ) assert_equal([sorted(adj.keys()) for adj in adjs_for_cols], expected_adjustments)
def test_bundle(self): with open(test_resource_path( 'quandl_samples', 'QUANDL_ARCHIVE.zip'), 'rb') as quandl_response: self.responses.add( self.responses.GET, 'https://file_url.mock.quandl', body=quandl_response.read(), content_type='application/zip', status=200, ) url_map = { format_metadata_url(self.api_key): test_resource_path( 'quandl_samples', 'metadata.csv.gz', ) } zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } with patch_read_csv(url_map): ingest('quandl', environ=environ) bundle = load('quandl', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.start_date, 'bfill')], sessions[sessions.get_loc(self.end_date, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjs_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
def test_bundle(self): with open( join(TEST_RESOURCE_PATH, "quandl_samples", "QUANDL_ARCHIVE.zip"), "rb", ) as quandl_response: self.responses.add( self.responses.GET, "https://file_url.mock.quandl", body=quandl_response.read(), content_type="application/zip", status=200, ) url_map = { format_metadata_url(self.api_key): join( TEST_RESOURCE_PATH, "quandl_samples", "metadata.csv.gz", ) } zipline_root = self.enter_instance_context(tmp_dir()).path environ = { "ZIPLINE_ROOT": zipline_root, "QUANDL_API_KEY": self.api_key, } with patch_read_csv(url_map): ingest("quandl", environ=environ) bundle = load("quandl", environ=environ) sids = 0, 1, 2, 3 assert set(bundle.asset_finder.sids) == set(sids) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.start_date, "bfill")], sessions[sessions.get_loc(self.end_date, "ffill")], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) np.testing.assert_array_almost_equal(actual, expected_pricing, decimal=2) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjs_for_cols, expected_adjustments): assert adjustments == expected, column
def __register_bundle__(self): from zipline.data.bundles import register from zipline.data.bundles import ingest register( self.bundle_name, # name this whatever you like self.tradea_bundle(self.symbol_list), ) ingest(self.bundle_name)
def ingest(bundle, show_progress): """Ingest the data for the given bundle. """ bundles_module.ingest( bundle, os.environ, pd.Timestamp.utcnow(), show_progress, )
def ingest(bundle, assets_version, show_progress): if bundle == 'tdx': register('tdx', tdx_bundle, 'SHSZ') bundles_module.ingest(bundle, os.environ, pd.Timestamp.utcnow(), assets_version, show_progress, )
def test_bundle(self): zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } # custom bundles need to be registered before use or they will not # be recognized register( 'ZacksQuandl', from_zacks_dump( test_resource_path('zacks_samples', 'fictitious.csv'))) ingest('ZacksQuandl', environ=environ) # load bundle now that it has been ingested bundle = load('ZacksQuandl', environ=environ) sids = 0, 1, 2 # check sids match assert_equal(set(bundle.asset_finder.sids), set(sids)) # check asset_{start, end} is the same as {start, end}_date for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) # get daily OHLCV data from bundle sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, 'bfill')], sessions[sessions.get_loc(self.asset_end, 'ffill')], sids, ) # get expected data from csv expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) # check OHLCV data matches assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
def ingest(bundle_name, show_progress): """Ingest the data for the given bundle. """ bundles.ingest( bundle_name, os.environ, datetime.date.today(), show_progress, )
def ingest(bundle, assets_version, show_progress): """提取指定包的数据 """ bundles_module.ingest( bundle, os.environ, pd.Timestamp.utcnow(), assets_version, show_progress, )
def ingest(bundle, assets, minute, start, fundamental, assets_version, show_progress): if bundle == 'tdx': if assets: if not os.path.exists(assets): raise FileNotFoundError df = pd.read_csv(assets, names=['symbol', 'name'], dtype=str, encoding='utf8') register_tdx(df,minute,start,fundamental) else: register_tdx(None,minute,start,fundamental) bundles_module.ingest(bundle, os.environ, pd.Timestamp.utcnow(), assets_version, show_progress, )
def ingest(bundle, assets, minute, start, fundamental, assets_version, show_progress, writer): if bundle == 'tdx': if assets: if not os.path.exists(assets): raise FileNotFoundError df = pd.read_csv(assets, names=['symbol', 'name'], dtype=str, encoding='utf8') register_tdx(df,minute,start,fundamental) else: register_tdx(None,minute,start,fundamental) bundles_module.ingest(bundle, os.environ, pd.Timestamp.utcnow(), assets_version, show_progress, writer=writer )
def ingest(bundle, assets, minute, start=None, show_progress=True): if bundle == 'tdx': if assets: if not os.path.exists(assets): raise FileNotFoundError df = pd.read_csv(assets, names=['symbol', 'name'], dtype=str, encoding='utf8') register_tdx(df[:1], minute, start) else: df = pd.DataFrame({ 'symbol': ['000001'], 'name': ['平安银行'] }) register_tdx(df, minute, start) bundles_module.ingest(bundle, os.environ, pd.Timestamp.utcnow(), show_progress=show_progress, )
def target_ingest(assets, ingest_minute=False): if assets: if not os.path.exists(assets): raise FileNotFoundError df = pd.read_csv(assets, names=['symbol', 'name'], dtype=str, encoding='utf8') register_tdx(df[:1], ingest_minute) else: df = pd.DataFrame({'symbol': ['000001'], 'name': ['平安银行']}) register_tdx(df, ingest_minute) bundles_module.ingest( 'tdx', os.environ, pd.Timestamp.utcnow(), show_progress=True, )
def target_ingest(assets, ingest_minute=False): import cn_stock_holidays.zipline.default_calendar if assets: if not os.path.exists(assets): raise FileNotFoundError df = pd.read_csv(assets, names=['symbol', 'name'], dtype=str, encoding='utf8') register('tdx', partial(tdx_bundle, df[:1], ingest_minute), 'SHSZ') else: df = pd.DataFrame({'symbol': ['000001'], 'name': ['平安银行']}) register('tdx', partial(tdx_bundle, df, ingest_minute), 'SHSZ') bundles_module.ingest( 'tdx', os.environ, pd.Timestamp.utcnow(), show_progress=True, )
def test_bundle(self): environ = { "CSVDIR": join( TEST_RESOURCE_PATH, "csvdir_samples", "csvdir", ), } ingest("csvdir", environ=environ) bundle = load("csvdir", environ=environ) sids = 0, 1, 2, 3 assert set(bundle.asset_finder.sids) == set(sids) for equity in bundle.asset_finder.retrieve_all(sids): assert equity.start_date == self.asset_start, equity assert equity.end_date == self.asset_end, equity sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, "bfill")], sessions[sessions.get_loc(self.asset_end, "ffill")], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) np.testing.assert_array_almost_equal(actual, expected_pricing, decimal=2) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( self.columns, sessions, pd.Index(sids), ) assert [sorted(adj.keys()) for adj in adjs_for_cols] == expected_adjustments
def ingest(bundle, assets, minute, start, overwrite, assets_version, show_progress): logger.warning( "this project is no longer maintained, please go to https://github.com/JaysonAlbert/zipline for the new project." ) if bundle == 'tdx': if assets: if not os.path.exists(assets): raise FileNotFoundError df = pd.read_csv(assets, names=['symbol', 'name'], dtype=str, encoding='utf8') register_tdx(df, minute, start, overwrite) else: register_tdx(None, minute, start, overwrite) bundles_module.ingest( bundle, os.environ, pd.Timestamp.utcnow(), assets_version, show_progress, )
bundle, os.environ, pd.Timestamp.utcnow(), assets_version, show_progress, ) if __name__ == '__main__': import sys start_session = pd.to_datetime('20000124', utc=True) if len(sys.argv) >= 2: assets = sys.argv[1] if not os.path.exists(assets): raise FileNotFoundError df = pd.read_csv(assets, names=['symbol', 'name'], dtype=str, encoding='utf8') register_tdx(df, start=start_session) else: register_tdx(minute=True, start=start_session) bundles_module.ingest( 'tdx', os.environ, pd.Timestamp.utcnow(), show_progress=True, ) # main()
def test_bundle(self): url_map = merge( { format_wiki_url( self.api_key, symbol, self.start_date, self.end_date, ): test_resource_path('quandl_samples', symbol + '.csv.gz') for symbol in self.symbols }, { format_metadata_url(self.api_key, n): test_resource_path( 'quandl_samples', 'metadata-%d.csv.gz' % n, ) for n in (1, 2) }, ) zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } with patch_read_csv(url_map, strict=True): ingest('quandl', environ=environ) bundle = load('quandl', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) cal = self.calendar actual = bundle.daily_bar_reader.load_raw_arrays( self.columns, cal[cal.get_loc(self.asset_start, 'bfill')], cal[cal.get_loc(self.asset_end, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, cal, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
# start_date += timedelta(days=1) start_date = end_date - timedelta(days=365) while not cal.is_session(start_date): start_date -= timedelta(days=1) initialize_client() import time start_time = time.time() register( 'alpaca_api', # api_to_bundle(interval=['1d', '1m']), # api_to_bundle(interval=['1m']), api_to_bundle(interval=['1d']), calendar_name='NYSE', start_session=start_date, end_session=end_date) assets_version = ((), )[0] # just a weird way to create an empty tuple bundles_module.ingest( "alpaca_api", os.environ, assets_versions=assets_version, show_progress=True, ) print(f"--- It took {timedelta(seconds=time.time() - start_time)} ---")
if __name__ == '__main__': cal: TradingCalendar = trading_calendars.get_calendar('NYSE') start_date = pd.Timestamp('1999-11-1', tz='utc') end_date = pd.Timestamp(date.today() - timedelta(days=1), tz='utc') while not cal.is_session(end_date): end_date -= timedelta(days=1) print('ingesting tiingo-data from: ' + str(start_date) + ' to: ' + str(end_date)) start_time = time.time() register('tiingo', tiingo_bundle, calendar_name='NYSE', start_session=start_date, end_session=end_date) assets_version = ((), )[0] # just a weird way to create an empty tuple bundles_module.ingest( "tiingo", os.environ, assets_versions=assets_version, show_progress=True, ) print("--- %s seconds ---" % (time.time() - start_time))
def init(): print('Initializing...') from zipline.data.bundles import register from estimize.zipline.data.bundles.yahoo import yahoo_bundle tickers = { 'SPY', } register( 'yahoo', yahoo_bundle(tickers), ) bundles_module.ingest( 'yahoo', os.environ, pd.Timestamp.utcnow(), [], True, ) bundles_module.ingest( 'quantopian-quandl', os.environ, pd.Timestamp.utcnow(), [], True, ) injector = Injector([DefaultModule]) asset_info_service = injector.get(AssetInfoService) estimates_service = injector.get(EstimatesService) estimize_consensus_service = injector.get(EstimizeConsensusService) estimize_signal_service = injector.get(EstimizeSignalService) factor_serivce = injector.get(FactorService) market_cap_service = injector.get(MarketCapService) releases_service = injector.get(ReleasesService) missing_csv_warning = 'Make sure you have added {} to your ./data directory.' remote_csv_warning = 'There was an issue downloading {}, make sure you are connected to the internet.' actions = [ ('instruments.csv', asset_info_service.get_asset_info, missing_csv_warning), ('estimates.csv', estimates_service.get_estimates, missing_csv_warning), ('consensus.csv', estimize_consensus_service.get_final_consensuses, missing_csv_warning), ('signal_time_series.csv', estimize_signal_service.get_signals, missing_csv_warning), ('market_factors.csv', factor_serivce.get_market_factors, remote_csv_warning), ('market_caps.csv', market_cap_service.get_market_caps, remote_csv_warning), ('releases.csv', releases_service.get_releases, missing_csv_warning), ] def item_show_func(item): if item is not None: return 'Caching {}'.format(item[0]) with click.progressbar(actions, label='Caching Estimize Data', item_show_func=item_show_func) as items: for item in items: try: item[1]() except: print('\nERROR: {}'.format(item[2].format(item[0])))
print("calling asset_db_writer") print("metadata", type(metadata)) # drop metadata nan val which exists in any items first metadata = metadata.dropna(axis=0, how="any") # Not sure why symbol_map is needed symbol_map = pd.Series(metadata.symbol.index, metadata.symbol) if traceDebug: print("symbol_map", type(symbol_map)) print(symbol_map) # 写入基础信息 asset_db_writer.write(equities=metadata) adjustment_writer.write() return bundle_name = 'spy-quotemedia-bundle' register( bundle_name, squant_bundle, calendar_name='NYSE' # US equities ) ingest(bundle_name) __all__ = ['squant_bundle']
# alpha-vantage has a fixed time-window, no point in changing these start_date = pd.Timestamp('1999-11-1', tz='utc') end_date = pd.Timestamp(date.today() - timedelta(days=1), tz='utc') while not cal.is_session(end_date): end_date -= timedelta(days=1) print('ingesting alpha_vantage-data from: ' + str(start_date) + ' to: ' + str(end_date)) start_time = time.time() register( 'alpha_vantage', # api_to_bundle(interval=['1d', '1m']), # api_to_bundle(interval=['1m']), api_to_bundle(interval=['1d']), calendar_name='NYSE', start_session=start_date, end_session=end_date) assets_version = ((), )[0] # just a weird way to create an empty tuple bundles_module.ingest( "alpha_vantage", os.environ, assets_versions=assets_version, show_progress=True, ) print("--- %s seconds ---" % (time.time() - start_time))
def test_bundle(self): url_map = merge( { format_wiki_url( self.api_key, symbol, self.start_date, self.end_date, ): test_resource_path('quandl_samples', symbol + '.csv.gz') for symbol in self.symbols }, { format_metadata_url(self.api_key, n): test_resource_path( 'quandl_samples', 'metadata-%d.csv.gz' % n, ) for n in (1, 2) }, ) zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } with patch_read_csv(url_map, strict=True): ingest('quandl', environ=environ) bundle = load('quandl', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, 'bfill')], sessions[sessions.get_loc(self.asset_end, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
while not cal.is_session(start_date): start_date -= timedelta(days=1) print(f'Ingest between {start_date} and {end_date}') # initialize_client() import time start_time = time.time() register( 'custom_csv', # api_to_bundle(interval=['1d', '1m']), # api_to_bundle(interval=['1m']), api_to_bundle(interval=['1d']), calendar_name='NYSE', start_session=start_date, end_session=end_date ) assets_version = ((),)[0] # just a weird way to create an empty tuple bundles_module.ingest( "custom_csv", os.environ, assets_versions=assets_version, show_progress=True, ) print(f"--- It took {timedelta(seconds=time.time() - start_time)} ---")