def __init__(self, calendar_name='NYSE'): self._calendar = zl.get_calendar(name=calendar_name) self._cols = [ 'open', 'high', 'low', 'close', 'volume', 'dividend', 'split' ] # The number of days the price manager will keep trying to pull data for a symbol that is not returning data. self.MISSING_DATE_THRESHOLD = 5
class ZacksBundleTestCase(ZiplineTestCase): """ Class for testing the Zacks daily data bundle. An test file is stored in tests/resources/zacks_samples/fictious.csv """ symbols = 'MFF', 'JMH', 'PBH' asset_start = pd.Timestamp('2016-04-18', tz='utc') asset_end = pd.Timestamp('2016-07-06', tz='utc') bundle = bundles['quandl'] calendar = get_calendar(bundle.calendar_name) start_date = calendar.first_session end_date = calendar.last_session api_key = 'ayylmao' columns = 'open', 'high', 'low', 'close', 'volume' def _expected_data(self, asset_finder): sids = { symbol: asset_finder.lookup_symbol( symbol, self.asset_start, ).sid for symbol in self.symbols } # load data from CSV df = pd.read_csv(test_resource_path('zacks_samples', 'fictitious.csv'), index_col='date', parse_dates=['date'], usecols=[ 'date', 'open', 'high', 'low', 'close', 'volume', 'ticker' ], na_values=['NA']) # drop NA rows (non trading days) or loader will wipe out entire column df = df.dropna() df = df.replace({"ticker": sids}) # convert ticker to sids df = df.rename(columns={"ticker": "sid"}) # zacks data contains fractional shares, these get dropped df["volume"] = np.floor(df["volume"]) # split one large DataFrame into one per sid # (also drops unwanted tickers) subs = [df[df['sid'] == sid] for sid in sorted(sids.values())] # package up data from CSV so that it is in the same format as data # coming out of the bundle the format is a list of 5 2D arrays one # for each OHLCV pricing = [] for column in self.columns: vs = np.zeros((subs[0].shape[0], len(subs))) for i, sub in enumerate(subs): vs[:, i] = sub[column].values if column == 'volume': vs = np.nan_to_num(vs) pricing.append(vs) return pricing, [] def test_bundle(self): zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } # custom bundles need to be registered before use or they will not # be recognized register( 'ZacksQuandl', from_zacks_dump( test_resource_path('zacks_samples', 'fictitious.csv'))) ingest('ZacksQuandl', environ=environ) # load bundle now that it has been ingested bundle = load('ZacksQuandl', environ=environ) sids = 0, 1, 2 # check sids match assert_equal(set(bundle.asset_finder.sids), set(sids)) # check asset_{start, end} is the same as {start, end}_date for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) # get daily OHLCV data from bundle sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, 'bfill')], sessions[sessions.get_loc(self.asset_end, 'ffill')], sids, ) # get expected data from csv expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) # check OHLCV data matches assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
class QuandlBundleTestCase(WithResponses, ZiplineTestCase): symbols = "AAPL", "BRK_A", "MSFT", "ZEN" start_date = pd.Timestamp("2014-01", tz="utc") end_date = pd.Timestamp("2015-01", tz="utc") bundle = bundles["quandl"] calendar = get_calendar(bundle.calendar_name) api_key = "IamNotaQuandlAPIkey" columns = "open", "high", "low", "close", "volume" def _expected_data(self, asset_finder): sids = { symbol: asset_finder.lookup_symbol( symbol, None, ).sid for symbol in self.symbols } # Load raw data from quandl test resources. data = load_data_table( file=test_resource_path("quandl_samples", "QUANDL_ARCHIVE.zip"), index_col="date", ) data["sid"] = pd.factorize(data.symbol)[0] all_ = data.set_index( "sid", append=True, ).unstack() # fancy list comprehension with statements @list @apply def pricing(): for column in self.columns: vs = all_[column].values if column == "volume": vs = np.nan_to_num(vs) yield vs # the first index our written data will appear in the files on disk start_idx = self.calendar.all_sessions.get_loc(self.start_date, "ffill") + 1 # convert an index into the raw dataframe into an index into the # final data i = op.add(start_idx) def expected_dividend_adjustment(idx, symbol): sid = sids[symbol] return (1 - all_.iloc[idx]["ex_dividend", sid] / all_.iloc[idx - 1]["close", sid]) adjustments = [ # ohlc { # dividends i(24): [ Float64Multiply( first_row=0, last_row=i(24), first_col=sids["AAPL"], last_col=sids["AAPL"], value=expected_dividend_adjustment(24, "AAPL"), ) ], i(87): [ Float64Multiply( first_row=0, last_row=i(87), first_col=sids["AAPL"], last_col=sids["AAPL"], value=expected_dividend_adjustment(87, "AAPL"), ) ], i(150): [ Float64Multiply( first_row=0, last_row=i(150), first_col=sids["AAPL"], last_col=sids["AAPL"], value=expected_dividend_adjustment(150, "AAPL"), ) ], i(214): [ Float64Multiply( first_row=0, last_row=i(214), first_col=sids["AAPL"], last_col=sids["AAPL"], value=expected_dividend_adjustment(214, "AAPL"), ) ], i(31): [ Float64Multiply( first_row=0, last_row=i(31), first_col=sids["MSFT"], last_col=sids["MSFT"], value=expected_dividend_adjustment(31, "MSFT"), ) ], i(90): [ Float64Multiply( first_row=0, last_row=i(90), first_col=sids["MSFT"], last_col=sids["MSFT"], value=expected_dividend_adjustment(90, "MSFT"), ) ], i(158): [ Float64Multiply( first_row=0, last_row=i(158), first_col=sids["MSFT"], last_col=sids["MSFT"], value=expected_dividend_adjustment(158, "MSFT"), ) ], i(222): [ Float64Multiply( first_row=0, last_row=i(222), first_col=sids["MSFT"], last_col=sids["MSFT"], value=expected_dividend_adjustment(222, "MSFT"), ) ], # splits i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids["AAPL"], last_col=sids["AAPL"], value=1.0 / 7.0, ) ], }, ] * (len(self.columns) - 1) + [ # volume { i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids["AAPL"], last_col=sids["AAPL"], value=7.0, ) ], } ] return pricing, adjustments def test_bundle(self): with open(test_resource_path("quandl_samples", "QUANDL_ARCHIVE.zip"), "rb") as quandl_response: self.responses.add( self.responses.GET, "https://file_url.mock.quandl", body=quandl_response.read(), content_type="application/zip", status=200, ) url_map = { format_metadata_url(self.api_key): test_resource_path( "quandl_samples", "metadata.csv.gz", ) } zipline_root = self.enter_instance_context(tmp_dir()).path environ = { "ZIPLINE_ROOT": zipline_root, "QUANDL_API_KEY": self.api_key, } with patch_read_csv(url_map): ingest("quandl", environ=environ) bundle = load("quandl", environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.start_date, "bfill")], sessions[sessions.get_loc(self.end_date, "ffill")], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjs_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
class QuandlBundleTestCase(WithResponses, ZiplineTestCase): symbols = 'AAPL', 'BRK_A', 'MSFT', 'ZEN' start_date = pd.Timestamp('2014-01', tz='utc') end_date = pd.Timestamp('2015-01', tz='utc') bundle = bundles['quandl'] calendar = get_calendar(bundle.calendar_name) api_key = 'IamNotaQuandlAPIkey' columns = 'open', 'high', 'low', 'close', 'volume' def _expected_data(self, asset_finder): sids = { symbol: asset_finder.lookup_symbol( symbol, None, ).sid for symbol in self.symbols } # Load raw data from quandl test resources. data = load_data_table(file=test_resource_path('quandl_samples', 'QUANDL_ARCHIVE.zip'), index_col='date') data['sid'] = pd.factorize(data.symbol)[0] all_ = data.set_index( 'sid', append=True, ).unstack() # fancy list comprehension with statements @list @apply def pricing(): for column in self.columns: vs = all_[column].values if column == 'volume': vs = np.nan_to_num(vs) yield vs # the first index our written data will appear in the files on disk start_idx = ( self.calendar.all_sessions.get_loc(self.start_date, 'ffill') + 1) # convert an index into the raw dataframe into an index into the # final data i = op.add(start_idx) def expected_dividend_adjustment(idx, symbol): sid = sids[symbol] return (1 - all_.iloc[idx]['ex_dividend'].loc[sid] / all_.iloc[idx - 1]['close'].loc[sid]) adjustments = [ # ohlc { # dividends i(24): [ Float64Multiply( first_row=0, last_row=i(24), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(24, 'AAPL'), ) ], i(87): [ Float64Multiply( first_row=0, last_row=i(87), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(87, 'AAPL'), ) ], i(150): [ Float64Multiply( first_row=0, last_row=i(150), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(150, 'AAPL'), ) ], i(214): [ Float64Multiply( first_row=0, last_row=i(214), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(214, 'AAPL'), ) ], i(31): [ Float64Multiply( first_row=0, last_row=i(31), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(31, 'MSFT'), ) ], i(90): [ Float64Multiply( first_row=0, last_row=i(90), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(90, 'MSFT'), ) ], i(158): [ Float64Multiply( first_row=0, last_row=i(158), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(158, 'MSFT'), ) ], i(222): [ Float64Multiply( first_row=0, last_row=i(222), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(222, 'MSFT'), ) ], # splits i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids['AAPL'], last_col=sids['AAPL'], value=1.0 / 7.0, ) ], }, ] * (len(self.columns) - 1) + [ # volume { i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids['AAPL'], last_col=sids['AAPL'], value=7.0, ) ], } ] return pricing, adjustments def test_bundle(self): with open(test_resource_path('quandl_samples', 'QUANDL_ARCHIVE.zip'), 'rb') as quandl_response: self.responses.add( self.responses.GET, 'https://file_url.mock.quandl', body=quandl_response.read(), content_type='application/zip', status=200, ) url_map = { format_metadata_url(self.api_key): test_resource_path( 'quandl_samples', 'metadata.csv.gz', ) } zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } with patch_read_csv(url_map): ingest('quandl', environ=environ) bundle = load('quandl', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.start_date, 'bfill')], sessions[sessions.get_loc(self.end_date, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjs_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
from zipline import get_calendar from zipline.data.benchmarks_cn import get_cn_benchmark_returns from zipline.data.treasuries_cn import get_treasury_data from zipline.pipeline import CustomFactor, Pipeline from zipline.pipeline.data import USEquityPricing from zipline.pipeline.fundamentals.reader import Fundamentals from zipline.research import run_pipeline from cswd.common.utils import data_root # 设置显示日志 logbook.set_datetime_format('local') logbook.StreamHandler(sys.stdout).push_application() logger = logbook.Logger('构建ff因子') calendar = get_calendar('SZSH') all_trading_days = calendar.schedule.index all_trading_days = all_trading_days[ all_trading_days <= calendar.actual_last_session] # 每月交易天数(近似值20,不同于美国股市,A股每年交易天数大约为244天) normal_days = 31 business_days = int(0.66 * normal_days) def get_rm_rf(earliest_date, symbol='000300'): """ Rm-Rf(市场收益 - 无风险收益) 基准股票指数收益率 - 国库券1个月收益率
class QuandlBundleTestCase(ZiplineTestCase): symbols = 'AAPL', 'BRK_A', 'MSFT', 'ZEN' asset_start = pd.Timestamp('2014-01', tz='utc') asset_end = pd.Timestamp('2015-01', tz='utc') bundle = bundles['quandl'] calendar = get_calendar(bundle.calendar_name) start_date = calendar.first_session end_date = calendar.last_session api_key = 'ayylmao' columns = 'open', 'high', 'low', 'close', 'volume' def _expected_data(self, asset_finder): sids = { symbol: asset_finder.lookup_symbol( symbol, self.asset_start, ).sid for symbol in self.symbols } def per_symbol(symbol): df = pd.read_csv( test_resource_path('quandl_samples', symbol + '.csv.gz'), parse_dates=['Date'], index_col='Date', usecols=[ 'Open', 'High', 'Low', 'Close', 'Volume', 'Date', 'Ex-Dividend', 'Split Ratio', ], na_values=['NA'], ).rename( columns={ 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume', 'Date': 'date', 'Ex-Dividend': 'ex_dividend', 'Split Ratio': 'split_ratio', }) df['sid'] = sids[symbol] return df all_ = pd.concat(map(per_symbol, self.symbols)).set_index( 'sid', append=True, ).unstack() # fancy list comprehension with statements @list @apply def pricing(): for column in self.columns: vs = all_[column].values if column == 'volume': vs = np.nan_to_num(vs) yield vs # the first index our written data will appear in the files on disk start_idx = ( self.calendar.all_sessions.get_loc(self.asset_start, 'ffill') + 1) # convert an index into the raw dataframe into an index into the # final data i = op.add(start_idx) def expected_dividend_adjustment(idx, symbol): sid = sids[symbol] return (1 - all_.ix[idx, ('ex_dividend', sid)] / all_.ix[idx - 1, ('close', sid)]) adjustments = [ # ohlc { # dividends i(24): [ Float64Multiply( first_row=0, last_row=i(24), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(24, 'AAPL'), ) ], i(87): [ Float64Multiply( first_row=0, last_row=i(87), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(87, 'AAPL'), ) ], i(150): [ Float64Multiply( first_row=0, last_row=i(150), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(150, 'AAPL'), ) ], i(214): [ Float64Multiply( first_row=0, last_row=i(214), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(214, 'AAPL'), ) ], i(31): [ Float64Multiply( first_row=0, last_row=i(31), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(31, 'MSFT'), ) ], i(90): [ Float64Multiply( first_row=0, last_row=i(90), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(90, 'MSFT'), ) ], i(222): [ Float64Multiply( first_row=0, last_row=i(222), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(222, 'MSFT'), ) ], # splits i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids['AAPL'], last_col=sids['AAPL'], value=1.0 / 7.0, ) ], }, ] * (len(self.columns) - 1) + [ # volume { i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids['AAPL'], last_col=sids['AAPL'], value=7.0, ) ], } ] return pricing, adjustments def test_bundle(self): url_map = merge( { format_wiki_url( self.api_key, symbol, self.start_date, self.end_date, ): test_resource_path('quandl_samples', symbol + '.csv.gz') for symbol in self.symbols }, { format_metadata_url(self.api_key, n): test_resource_path( 'quandl_samples', 'metadata-%d.csv.gz' % n, ) for n in (1, 2) }, ) zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } with patch_read_csv(url_map, strict=True): ingest('quandl', environ=environ) bundle = load('quandl', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, 'bfill')], sessions[sessions.get_loc(self.asset_end, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
class QuandlBundleTestCase(WithResponses, ZiplineTestCase): symbols = to_symbol(TEST_SIDS) start_date = pd.Timestamp('2014-01', tz='utc') end_date = pd.Timestamp('2015-01', tz='utc') bundle = bundles[TEST_BUNDLE_NAME] calendar = get_calendar(bundle.calendar_name) columns = 'open', 'high', 'low', 'close', 'volume' def _expected_data(self, asset_finder): sids = { symbol: asset_finder.lookup_symbol( symbol, None, ).sid for symbol in self.symbols } # Load raw data local db. data = _raw_data(self.symbols, self.start_date, self.end_date, self.columns) all_ = data.set_index( 'sid', append=True, ).unstack() # fancy list comprehension with statements @list @apply def pricing(): for column in self.columns: vs = all_[column].values if column == 'volume': vs = np.nan_to_num(vs) yield vs # the first index our written data will appear in the files on disk start_idx = ( self.calendar.all_sessions.get_loc(self.start_date, 'ffill') + 1) ######修改到此处 # convert an index into the raw dataframe into an index into the # final data i = op.add(start_idx) def expected_dividend_adjustment(idx, symbol): sid = sids[symbol] return (1 - all_.ix[idx, ('ex_dividend', sid)] / all_.ix[idx - 1, ('close', sid)]) adjustments = [ # ohlc { # dividends i(24): [ Float64Multiply( first_row=0, last_row=i(24), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(24, 'AAPL'), ) ], i(87): [ Float64Multiply( first_row=0, last_row=i(87), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(87, 'AAPL'), ) ], i(150): [ Float64Multiply( first_row=0, last_row=i(150), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(150, 'AAPL'), ) ], i(214): [ Float64Multiply( first_row=0, last_row=i(214), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(214, 'AAPL'), ) ], i(31): [ Float64Multiply( first_row=0, last_row=i(31), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(31, 'MSFT'), ) ], i(90): [ Float64Multiply( first_row=0, last_row=i(90), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(90, 'MSFT'), ) ], i(158): [ Float64Multiply( first_row=0, last_row=i(158), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(158, 'MSFT'), ) ], i(222): [ Float64Multiply( first_row=0, last_row=i(222), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(222, 'MSFT'), ) ], # splits i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids['AAPL'], last_col=sids['AAPL'], value=1.0 / 7.0, ) ], }, ] * (len(self.columns) - 1) + [ # volume { i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids['AAPL'], last_col=sids['AAPL'], value=7.0, ) ], } ] return pricing, adjustments def test_bundle(self): # # 耗时3秒以内 ingest(TEST_BUNDLE_NAME) bundle = load(TEST_BUNDLE_NAME) sids = TEST_SIDS assert_equal(set(bundle.asset_finder.sids), set(sids)) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.start_date, 'bfill')], sessions[sessions.get_loc(self.end_date, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )