def test_bundle(self): with open(test_resource_path( 'quandl_samples', 'QUANDL_ARCHIVE.zip'), 'rb') as quandl_response: self.responses.add( self.responses.GET, 'https://file_url.mock.quandl', body=quandl_response.read(), content_type='application/zip', status=200, ) url_map = { format_metadata_url(self.api_key): test_resource_path( 'quandl_samples', 'metadata.csv.gz', ) } zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } with patch_read_csv(url_map): ingest('quandl', environ=environ) bundle = load('quandl', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.start_date, 'bfill')], sessions[sessions.get_loc(self.end_date, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjs_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register('test', lambda *args: None) cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( 'example_data/expected_perf/%s' % pd.__version__.replace('.', '-'), ), serialization='pickle', ) # We need to call gc.collect before tearing down our class because we # have a cycle between TradingAlgorithm and AlgorithmSimulator which # ultimately holds a reference to the pipeline engine passed to the # tests here. # This means that we're not guaranteed to have deleted our disk-backed # resource readers (e.g. SQLiteAdjustmentReader) before trying to # delete the tempdir, which causes failures on Windows because Windows # doesn't allow you to delete a file if someone still has an open # handle to that file. # :( cls.add_class_callback(gc.collect)
def per_symbol(symbol): df = pd.read_csv( test_resource_path('quandl_samples', symbol + '.csv.gz'), parse_dates=['Date'], index_col='Date', usecols=[ 'Open', 'High', 'Low', 'Close', 'Volume', 'Date', 'Ex-Dividend', 'Split Ratio', ], na_values=['NA'], ).rename(columns={ 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume', 'Date': 'date', 'Ex-Dividend': 'ex_dividend', 'Split Ratio': 'split_ratio', }) df['sid'] = sids[symbol] return df
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register("test", lambda *args: None) cls.add_class_callback(partial(unregister, "test")) with tarfile.open(test_resource_path("example_data.tar.gz")) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath("example_data/expected_perf/%s" % pd.__version__.replace(".", "-")), serialization="pickle", ) # We need to call gc.collect before tearing down our class because we # have a cycle between TradingAlgorithm and AlgorithmSimulator which # ultimately holds a reference to the pipeline engine passed to the # tests here. # This means that we're not guaranteed to have deleted our disk-backed # resource readers (e.g. SQLiteAdjustmentReader) before trying to # delete the tempdir, which causes failures on Windows because Windows # doesn't allow you to delete a file if someone still has an open # handle to that file. # :( cls.add_class_callback(gc.collect)
def test_bundle(self): environ = { 'CSVDIR': test_resource_path('csvdir_samples', 'csvdir') } ingest('csvdir', environ=environ) bundle = load('csvdir', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, 'bfill')], sessions[sessions.get_loc(self.asset_end, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjs_for_cols = bundle.adjustment_reader.load_pricing_adjustments( self.columns, sessions, pd.Index(sids), ) assert_equal([sorted(adj.keys()) for adj in adjs_for_cols], expected_adjustments)
def test_bundle(self): zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } # custom bundles need to be registered before use or they will not # be recognized register( 'ZacksQuandl', from_zacks_dump( test_resource_path('zacks_samples', 'fictitious.csv'))) ingest('ZacksQuandl', environ=environ) # load bundle now that it has been ingested bundle = load('ZacksQuandl', environ=environ) sids = 0, 1, 2 # check sids match assert_equal(set(bundle.asset_finder.sids), set(sids)) # check asset_{start, end} is the same as {start, end}_date for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) # get daily OHLCV data from bundle sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, 'bfill')], sessions[sessions.get_loc(self.asset_end, 'ffill')], sids, ) # get expected data from csv expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) # check OHLCV data matches assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register("test", lambda *args: None) cls.add_class_callback(partial(unregister, "test")) with tarfile.open(test_resource_path("example_data.tar.gz")) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath("example_data/expected_perf/%s" % pd.__version__.replace(".", "-")), serialization="pickle", )
def pricing_callback(request): headers = { 'content-encoding': 'gzip', 'content-type': 'text/csv', } path = test_resource_path( 'yahoo_samples', get_symbol_from_url(request.url) + '.csv.gz', ) with open(path, 'rb') as f: return ( 200, headers, f.read(), )
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register('test', lambda *args: None) cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( 'example_data/expected_perf/%s' % pd.__version__.replace('.', '-'), ), serialization='pickle', )
def _expected_data(self, asset_finder): sids = { symbol: asset_finder.lookup_symbol( symbol, self.asset_start, ).sid for symbol in self.symbols } # load data from CSV df = pd.read_csv(test_resource_path('zacks_samples', 'fictitious.csv'), index_col='date', parse_dates=['date'], usecols=[ 'date', 'open', 'high', 'low', 'close', 'volume', 'ticker' ], na_values=['NA']) # drop NA rows (non trading days) or loader will wipe out entire column df = df.dropna() df = df.replace({"ticker": sids}) # convert ticker to sids df = df.rename(columns={"ticker": "sid"}) # zacks data contains fractional shares, these get dropped df["volume"] = np.floor(df["volume"]) # split one large DataFrame into one per sid # (also drops unwanted tickers) subs = [df[df['sid'] == sid] for sid in sorted(sids.values())] # package up data from CSV so that it is in the same format as data # coming out of the bundle the format is a list of 5 2D arrays one # for each OHLCV pricing = [] for column in self.columns: vs = np.zeros((subs[0].shape[0], len(subs))) for i, sub in enumerate(subs): vs[:, i] = sub[column].values if column == 'volume': vs = np.nan_to_num(vs) pricing.append(vs) return pricing, []
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register('test', lambda *args: None) cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( 'example_data/expected_perf/%s' % pd.__version__.replace('.', '-'), ), serialization='pickle', ) market_data = ('SPY_benchmark.csv', 'treasury_curves.csv') for data in market_data: ensure_file(cls.tmpdir.getpath('example_data/root/data/' + data))
def per_symbol(symbol): df = pd.read_csv( test_resource_path('csvdir_samples', 'csvdir', 'daily', symbol + '.csv.gz'), parse_dates=['date'], index_col='date', usecols=[ 'open', 'high', 'low', 'close', 'volume', 'date', 'dividend', 'split', ], na_values=['NA'], ) df['sid'] = sids[symbol] return df
def init_class_fixtures(cls): super().init_class_fixtures() register('test', lambda *args: None) cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( 'example_data/expected_perf/%s' % pd.__version__.replace('.', '-'), ), serialization='pickle', ) cls.no_benchmark_expected_perf = { example_name: cls._no_benchmark_expectations_applied(expected_perf.copy()) for example_name, expected_perf in cls.expected_perf.items() }
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register("test", lambda *args: None) cls.add_class_callback(partial(unregister, "test")) with tarfile.open(test_resource_path("example_data.tar.gz")) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( "example_data/expected_perf/%s" % pd.__version__.replace(".", "-"), ), serialization="pickle", ) cls.no_benchmark_expected_perf = { example_name: cls._no_benchmark_expectations_applied(expected_perf.copy()) for example_name, expected_perf in cls.expected_perf.items() }
def init_class_fixtures(cls): super(ExamplesTests, cls).init_class_fixtures() register('test', lambda *args: None) cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: tar.extractall(cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath( 'example_data/expected_perf/%s' % pd.__version__.replace('.', '-'), ), serialization='pickle', ) market_data = ('SPY_benchmark.csv', 'treasury_curves.csv') for data in market_data: update_modified_time( cls.tmpdir.getpath( 'example_data/root/data/' + data ) )
def zipfile_path(symbol): return test_resource_path('quandl_samples', symbol + '.csv.gz')
def test_bundle(self): url_map = merge( { format_wiki_url( self.api_key, symbol, self.start_date, self.end_date, ): test_resource_path('quandl_samples', symbol + '.csv.gz') for symbol in self.symbols }, { format_metadata_url(self.api_key, n): test_resource_path( 'quandl_samples', 'metadata-%d.csv.gz' % n, ) for n in (1, 2) }, ) zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } with patch_read_csv(url_map, strict=True): ingest('quandl', environ=environ) bundle = load('quandl', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) sessions = self.calendar.all_sessions actual = bundle.equity_daily_bar_reader.load_raw_arrays( self.columns, sessions[sessions.get_loc(self.asset_start, 'bfill')], sessions[sessions.get_loc(self.asset_end, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, sessions, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
def _expected_data(self, asset_finder): sids = { symbol: asset_finder.lookup_symbol( symbol, None, ).sid for symbol in self.symbols } # Load raw data from quandl test resources. data = load_data_table( file=test_resource_path( 'quandl_samples', 'QUANDL_ARCHIVE.zip' ), index_col='date' ) data['sid'] = pd.factorize(data.symbol)[0] all_ = data.set_index( 'sid', append=True, ).unstack() # fancy list comprehension with statements @list @apply def pricing(): for column in self.columns: vs = all_[column].values if column == 'volume': vs = np.nan_to_num(vs) yield vs # the first index our written data will appear in the files on disk start_idx = ( self.calendar.all_sessions.get_loc(self.start_date, 'ffill') + 1 ) # convert an index into the raw dataframe into an index into the # final data i = op.add(start_idx) def expected_dividend_adjustment(idx, symbol): sid = sids[symbol] return ( 1 - all_.ix[idx, ('ex_dividend', sid)] / all_.ix[idx - 1, ('close', sid)] ) adjustments = [ # ohlc { # dividends i(24): [Float64Multiply( first_row=0, last_row=i(24), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(24, 'AAPL'), )], i(87): [Float64Multiply( first_row=0, last_row=i(87), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(87, 'AAPL'), )], i(150): [Float64Multiply( first_row=0, last_row=i(150), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(150, 'AAPL'), )], i(214): [Float64Multiply( first_row=0, last_row=i(214), first_col=sids['AAPL'], last_col=sids['AAPL'], value=expected_dividend_adjustment(214, 'AAPL'), )], i(31): [Float64Multiply( first_row=0, last_row=i(31), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(31, 'MSFT'), )], i(90): [Float64Multiply( first_row=0, last_row=i(90), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(90, 'MSFT'), )], i(158): [Float64Multiply( first_row=0, last_row=i(158), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(158, 'MSFT'), )], i(222): [Float64Multiply( first_row=0, last_row=i(222), first_col=sids['MSFT'], last_col=sids['MSFT'], value=expected_dividend_adjustment(222, 'MSFT'), )], # splits i(108): [Float64Multiply( first_row=0, last_row=i(108), first_col=sids['AAPL'], last_col=sids['AAPL'], value=1.0 / 7.0, )], }, ] * (len(self.columns) - 1) + [ # volume { i(108): [Float64Multiply( first_row=0, last_row=i(108), first_col=sids['AAPL'], last_col=sids['AAPL'], value=7.0, )], } ] return pricing, adjustments
def adjustments_callback(request): path = test_resource_path( 'yahoo_samples', get_symbol_from_url(request.url) + '.adjustments.gz', ) return 200, {}, read_compressed(path)
def main(ctx, rebuild_input): """Rebuild the perf data for test_examples""" example_path = test_resource_path("example_data.tar.gz") with tmp_dir() as d: with tarfile.open(example_path) as tar: tar.extractall(d.path) # The environ here should be the same (modulo the tempdir location) # as we use in test_examples.py. environ = {"ZIPLINE_ROOT": d.getpath("example_data/root")} if rebuild_input: raise NotImplementedError( "We cannot rebuild input for Yahoo because of " "changes Yahoo made to their API, so we cannot " "use Yahoo data bundles anymore. This will be fixed in " "a future release", ) # we need to register the bundle; it is already ingested and saved in # the example_data.tar.gz file @register("test") def nop_ingest(*args, **kwargs): raise NotImplementedError("we cannot rebuild the test buindle") new_perf_path = d.getpath( "example_data/new_perf/%s" % pd.__version__.replace(".", "-"), ) c = dataframe_cache( new_perf_path, serialization="pickle:2", ) with c: for name in EXAMPLE_MODULES: c[name] = examples.run_example( EXAMPLE_MODULES, name, environ=environ, benchmark_returns=read_checked_in_benchmark_data(), ) correct_called = [False] console = None def _exit(*args, **kwargs): console.raw_input = eof def correct(): correct_called[0] = True _exit() expected_perf_path = d.getpath( "example_data/expected_perf/%s" % pd.__version__.replace(".", "-"), ) # allow users to run some analysis to make sure that the new # results check out console = InteractiveConsole( { "correct": correct, "exit": _exit, "incorrect": _exit, "new": c, "np": np, "old": dataframe_cache( expected_perf_path, serialization="pickle", ), "pd": pd, "cols_to_check": examples._cols_to_check, "changed_results": changed_results, } ) console.interact(banner) if not correct_called[0]: ctx.fail( "`correct()` was not called! This means that the new" " results will not be written", ) # move the new results to the expected path shutil.rmtree(expected_perf_path) shutil.copytree(new_perf_path, expected_perf_path) # Clear out all the temporary new perf so it doesn't get added to the # tarball. shutil.rmtree(d.getpath("example_data/new_perf/")) with tarfile.open(example_path, "w|gz") as tar: tar.add(d.getpath("example_data"), "example_data")
def test_bundle(self): url_map = merge( { format_wiki_url( self.api_key, symbol, self.start_date, self.end_date, ): test_resource_path('quandl_samples', symbol + '.csv.gz') for symbol in self.symbols }, { format_metadata_url(self.api_key, n): test_resource_path( 'quandl_samples', 'metadata-%d.csv.gz' % n, ) for n in (1, 2) }, ) zipline_root = self.enter_instance_context(tmp_dir()).path environ = { 'ZIPLINE_ROOT': zipline_root, 'QUANDL_API_KEY': self.api_key, } with patch_read_csv(url_map, strict=True): ingest('quandl', environ=environ) bundle = load('quandl', environ=environ) sids = 0, 1, 2, 3 assert_equal(set(bundle.asset_finder.sids), set(sids)) for equity in bundle.asset_finder.retrieve_all(sids): assert_equal(equity.start_date, self.asset_start, msg=equity) assert_equal(equity.end_date, self.asset_end, msg=equity) cal = self.calendar actual = bundle.daily_bar_reader.load_raw_arrays( self.columns, cal[cal.get_loc(self.asset_start, 'bfill')], cal[cal.get_loc(self.asset_end, 'ffill')], sids, ) expected_pricing, expected_adjustments = self._expected_data( bundle.asset_finder, ) assert_equal(actual, expected_pricing, array_decimal=2) adjustments_for_cols = bundle.adjustment_reader.load_adjustments( self.columns, cal, pd.Index(sids), ) for column, adjustments, expected in zip(self.columns, adjustments_for_cols, expected_adjustments): assert_equal( adjustments, expected, msg=column, )
def zipfile_path(file_name): return test_resource_path('quandl_samples', file_name)
def _expected_data(self, asset_finder): sids = { symbol: asset_finder.lookup_symbol( symbol, None, ).sid for symbol in self.symbols } # Load raw data from quandl test resources. data = load_data_table( file=test_resource_path("quandl_samples", "QUANDL_ARCHIVE.zip"), index_col="date", ) data["sid"] = pd.factorize(data.symbol)[0] all_ = data.set_index( "sid", append=True, ).unstack() # fancy list comprehension with statements @list @apply def pricing(): for column in self.columns: vs = all_[column].values if column == "volume": vs = np.nan_to_num(vs) yield vs # the first index our written data will appear in the files on disk start_idx = self.calendar.all_sessions.get_loc(self.start_date, "ffill") + 1 # convert an index into the raw dataframe into an index into the # final data i = op.add(start_idx) def expected_dividend_adjustment(idx, symbol): sid = sids[symbol] return (1 - all_.iloc[idx]["ex_dividend", sid] / all_.iloc[idx - 1]["close", sid]) adjustments = [ # ohlc { # dividends i(24): [ Float64Multiply( first_row=0, last_row=i(24), first_col=sids["AAPL"], last_col=sids["AAPL"], value=expected_dividend_adjustment(24, "AAPL"), ) ], i(87): [ Float64Multiply( first_row=0, last_row=i(87), first_col=sids["AAPL"], last_col=sids["AAPL"], value=expected_dividend_adjustment(87, "AAPL"), ) ], i(150): [ Float64Multiply( first_row=0, last_row=i(150), first_col=sids["AAPL"], last_col=sids["AAPL"], value=expected_dividend_adjustment(150, "AAPL"), ) ], i(214): [ Float64Multiply( first_row=0, last_row=i(214), first_col=sids["AAPL"], last_col=sids["AAPL"], value=expected_dividend_adjustment(214, "AAPL"), ) ], i(31): [ Float64Multiply( first_row=0, last_row=i(31), first_col=sids["MSFT"], last_col=sids["MSFT"], value=expected_dividend_adjustment(31, "MSFT"), ) ], i(90): [ Float64Multiply( first_row=0, last_row=i(90), first_col=sids["MSFT"], last_col=sids["MSFT"], value=expected_dividend_adjustment(90, "MSFT"), ) ], i(158): [ Float64Multiply( first_row=0, last_row=i(158), first_col=sids["MSFT"], last_col=sids["MSFT"], value=expected_dividend_adjustment(158, "MSFT"), ) ], i(222): [ Float64Multiply( first_row=0, last_row=i(222), first_col=sids["MSFT"], last_col=sids["MSFT"], value=expected_dividend_adjustment(222, "MSFT"), ) ], # splits i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids["AAPL"], last_col=sids["AAPL"], value=1.0 / 7.0, ) ], }, ] * (len(self.columns) - 1) + [ # volume { i(108): [ Float64Multiply( first_row=0, last_row=i(108), first_col=sids["AAPL"], last_col=sids["AAPL"], value=7.0, ) ], } ] return pricing, adjustments