def test_lookup_future_chain(self): metadata = { 2: { 'symbol': 'ADN15', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-06-15', tz='UTC') }, 1: { 'symbol': 'ADV15', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-09-14', tz='UTC') }, 0: { 'symbol': 'ADF16', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-12-14', tz='UTC') }, } finder = AssetFinder(metadata=metadata) dt = pd.Timestamp('2015-06-19', tz='UTC') # Check that we get the expected number of contract, in the # right order ad_contracts = finder.lookup_future_chain('AD', dt) self.assertEqual(len(ad_contracts), 2) self.assertEqual(ad_contracts[0].sid, 1) self.assertEqual(ad_contracts[1].sid, 0)
def test_insert_metadata(self): finder = AssetFinder() finder.insert_metadata(0, asset_type='equity', start_date='2014-01-01', end_date='2015-01-01', symbol="PLAY", foo_data="FOO",) # Test proper insertion self.assertEqual('equity', finder.metadata_cache[0]['asset_type']) self.assertEqual('PLAY', finder.metadata_cache[0]['symbol']) self.assertEqual('2015-01-01', finder.metadata_cache[0]['end_date']) # Test invalid field self.assertFalse('foo_data' in finder.metadata_cache[0]) # Test updating fields finder.insert_metadata(0, asset_type='equity', start_date='2014-01-01', end_date='2015-02-01', symbol="PLAY", exchange="NYSE",) self.assertEqual('2015-02-01', finder.metadata_cache[0]['end_date']) self.assertEqual('NYSE', finder.metadata_cache[0]['exchange']) # Check that old data survived self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
def engine_from_files(daily_bar_path, adjustments_path, asset_db_path, calendar, warmup_assets=False): """ Construct a SimplePipelineEngine from local filesystem resources. Parameters ---------- daily_bar_path : str Path to pass to `BcolzDailyBarReader`. adjustments_path : str Path to pass to SQLiteAdjustmentReader. asset_db_path : str Path to pass to `AssetFinder`. calendar : pd.DatetimeIndex Calendar to use for the loader. warmup_assets : bool, optional Whether or not to populate AssetFinder caches. This can speed up initial latency on subsequent pipeline runs, at the cost of extra memory consumption. Default is False """ loader = USEquityPricingLoader.from_files(daily_bar_path, adjustments_path) asset_finder = AssetFinder(asset_db_path) if warmup_assets: results = asset_finder.retrieve_all(asset_finder.sids) print("Warmed up %d assets." % len(results)) return SimplePipelineEngine( lambda _: loader, calendar, asset_finder, )
def test_yahoo_bars_to_panel_source(self): env = TradingEnvironment() finder = AssetFinder(env.engine) stocks = ['AAPL', 'GE'] env.write_data(equities_identifiers=stocks) start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, indexes={}, start=start, end=end) check_fields = ['sid', 'open', 'high', 'low', 'close', 'volume', 'price'] copy_panel = data.copy() sids = finder.map_identifier_index_to_sids( data.items, data.major_axis[0] ) copy_panel.items = sids source = DataPanelSource(copy_panel) for event in source: for check_field in check_fields: self.assertIn(check_field, event) self.assertTrue(isinstance(event['volume'], (integer_types))) self.assertTrue(event['sid'] in sids)
def test_lookup_generic_handle_missing(self): data = pd.DataFrame.from_records( [ { 'sid': 0, 'file_name': 'real', 'company_name': 'real', 'start_date_nano': pd.Timestamp('2013-1-1', tz='UTC'), 'end_date_nano': pd.Timestamp('2014-1-1', tz='UTC'), 'exchange': '', }, { 'sid': 1, 'file_name': 'also_real', 'company_name': 'also_real', 'start_date_nano': pd.Timestamp('2013-1-1', tz='UTC'), 'end_date_nano': pd.Timestamp('2014-1-1', tz='UTC'), 'exchange': '', }, # Sid whose end date is before our query date. We should # still correctly find it. { 'sid': 2, 'file_name': 'real_but_old', 'company_name': 'real_but_old', 'start_date_nano': pd.Timestamp('2002-1-1', tz='UTC'), 'end_date_nano': pd.Timestamp('2003-1-1', tz='UTC'), 'exchange': '', }, # Sid whose start_date is **after** our query date. We should # **not** find it. { 'sid': 3, 'file_name': 'real_but_in_the_future', 'company_name': 'real_but_in_the_future', 'start_date_nano': pd.Timestamp('2014-1-1', tz='UTC'), 'end_date_nano': pd.Timestamp('2020-1-1', tz='UTC'), 'exchange': 'THE FUTURE', }, ] ) finder = AssetFinder(data) results, missing = finder.lookup_generic( ['real', 1, 'fake', 'real_but_old', 'real_but_in_the_future'], pd.Timestamp('2013-02-01', tz='UTC'), ) self.assertEqual(len(results), 3) self.assertEqual(results[0].symbol, 'real') self.assertEqual(results[0].sid, 0) self.assertEqual(results[1].symbol, 'also_real') self.assertEqual(results[1].sid, 1) self.assertEqual(results[2].symbol, 'real_but_old') self.assertEqual(results[2].sid, 2) self.assertEqual(len(missing), 2) self.assertEqual(missing[0], 'fake') self.assertEqual(missing[1], 'real_but_in_the_future')
def test_lookup_future_chain(self): metadata = { # Expires today, so should be valid 2: { 'symbol': 'ADN15', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-06-15', tz='UTC'), 'start_date': pd.Timestamp('2015-01-01', tz='UTC') }, 1: { 'symbol': 'ADV15', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-09-14', tz='UTC'), 'start_date': pd.Timestamp('2015-01-01', tz='UTC') }, # Starts trading today, so should be valid. 0: { 'symbol': 'ADF16', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-12-14', tz='UTC'), 'start_date': pd.Timestamp('2015-06-15', tz='UTC') }, # Copy of the above future, but starts trading in August, # so it isn't valid. 3: { 'symbol': 'ADF16', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-12-14', tz='UTC'), 'start_date': pd.Timestamp('2015-08-01', tz='UTC') }, } finder = AssetFinder(metadata=metadata) dt = pd.Timestamp('2015-06-15', tz='UTC') last_year = pd.Timestamp('2014-01-01', tz='UTC') first_day = pd.Timestamp('2015-01-01', tz='UTC') # Check that we get the expected number of contracts, in the # right order ad_contracts = finder.lookup_future_chain('AD', dt, dt) self.assertEqual(len(ad_contracts), 3) self.assertEqual(ad_contracts[0].sid, 2) self.assertEqual(ad_contracts[1].sid, 1) self.assertEqual(ad_contracts[2].sid, 0) # Check that we get nothing if our knowledge date is last year ad_contracts = finder.lookup_future_chain('AD', dt, last_year) self.assertEqual(len(ad_contracts), 0) # Check that we get things that start on the knowledge date ad_contracts = finder.lookup_future_chain('AD', dt, first_day) self.assertEqual(len(ad_contracts), 2)
def test_lookup_generic_handle_missing(self): data = pd.DataFrame.from_records( [ { 'sid': 0, 'symbol': 'real', 'start_date': pd.Timestamp('2013-1-1', tz='UTC'), 'end_date': pd.Timestamp('2014-1-1', tz='UTC'), 'exchange': '', }, { 'sid': 1, 'symbol': 'also_real', 'start_date': pd.Timestamp('2013-1-1', tz='UTC'), 'end_date': pd.Timestamp('2014-1-1', tz='UTC'), 'exchange': '', }, # Sid whose end date is before our query date. We should # still correctly find it. { 'sid': 2, 'symbol': 'real_but_old', 'start_date': pd.Timestamp('2002-1-1', tz='UTC'), 'end_date': pd.Timestamp('2003-1-1', tz='UTC'), 'exchange': '', }, # Sid whose start_date is **after** our query date. We should # **not** find it. { 'sid': 3, 'symbol': 'real_but_in_the_future', 'start_date': pd.Timestamp('2014-1-1', tz='UTC'), 'end_date': pd.Timestamp('2020-1-1', tz='UTC'), 'exchange': 'THE FUTURE', }, ] ) self.env.write_data(equities_df=data) finder = AssetFinder(self.env.engine) results, missing = finder.lookup_generic( ['REAL', 1, 'FAKE', 'REAL_BUT_OLD', 'REAL_BUT_IN_THE_FUTURE'], pd.Timestamp('2013-02-01', tz='UTC'), ) self.assertEqual(len(results), 3) self.assertEqual(results[0].symbol, 'REAL') self.assertEqual(results[0].sid, 0) self.assertEqual(results[1].symbol, 'ALSO_REAL') self.assertEqual(results[1].sid, 1) self.assertEqual(results[2].symbol, 'REAL_BUT_OLD') self.assertEqual(results[2].sid, 2) self.assertEqual(len(missing), 2) self.assertEqual(missing[0], 'FAKE') self.assertEqual(missing[1], 'REAL_BUT_IN_THE_FUTURE')
def test_lookup_symbol_delimited(self): as_of = pd.Timestamp("2013-01-01", tz="UTC") frame = pd.DataFrame.from_records( [ { "sid": i, "symbol": "TEST.%d" % i, "company_name": "company%d" % i, "start_date": as_of.value, "end_date": as_of.value, "exchange": uuid.uuid4().hex, } for i in range(3) ] ) self.env.write_data(equities_df=frame) finder = AssetFinder(self.env.engine) asset_0, asset_1, asset_2 = (finder.retrieve_asset(i) for i in range(3)) # we do it twice to catch caching bugs for i in range(2): with self.assertRaises(SymbolNotFound): finder.lookup_symbol("TEST", as_of) with self.assertRaises(SymbolNotFound): finder.lookup_symbol("TEST1", as_of) # '@' is not a supported delimiter with self.assertRaises(SymbolNotFound): finder.lookup_symbol("TEST@1", as_of) # Adding an unnecessary fuzzy shouldn't matter. for fuzzy_char in ["-", "/", "_", "."]: self.assertEqual(asset_1, finder.lookup_symbol("TEST%s1" % fuzzy_char, as_of))
def test_lookup_generic_handle_missing(self): data = pd.DataFrame.from_records( [ { "sid": 0, "symbol": "real", "start_date": pd.Timestamp("2013-1-1", tz="UTC"), "end_date": pd.Timestamp("2014-1-1", tz="UTC"), "exchange": "", }, { "sid": 1, "symbol": "also_real", "start_date": pd.Timestamp("2013-1-1", tz="UTC"), "end_date": pd.Timestamp("2014-1-1", tz="UTC"), "exchange": "", }, # Sid whose end date is before our query date. We should # still correctly find it. { "sid": 2, "symbol": "real_but_old", "start_date": pd.Timestamp("2002-1-1", tz="UTC"), "end_date": pd.Timestamp("2003-1-1", tz="UTC"), "exchange": "", }, # Sid whose start_date is **after** our query date. We should # **not** find it. { "sid": 3, "symbol": "real_but_in_the_future", "start_date": pd.Timestamp("2014-1-1", tz="UTC"), "end_date": pd.Timestamp("2020-1-1", tz="UTC"), "exchange": "THE FUTURE", }, ] ) self.env.write_data(equities_df=data) finder = AssetFinder(self.env.engine) results, missing = finder.lookup_generic( ["REAL", 1, "FAKE", "REAL_BUT_OLD", "REAL_BUT_IN_THE_FUTURE"], pd.Timestamp("2013-02-01", tz="UTC") ) self.assertEqual(len(results), 3) self.assertEqual(results[0].symbol, "REAL") self.assertEqual(results[0].sid, 0) self.assertEqual(results[1].symbol, "ALSO_REAL") self.assertEqual(results[1].sid, 1) self.assertEqual(results[2].symbol, "REAL_BUT_OLD") self.assertEqual(results[2].sid, 2) self.assertEqual(len(missing), 2) self.assertEqual(missing[0], "FAKE") self.assertEqual(missing[1], "REAL_BUT_IN_THE_FUTURE")
def test_sid_assignment(self): # This metadata does not contain SIDs metadata = {'PLAY': {'symbol': 'PLAY'}, 'MSFT': {'symbol': 'MSFT'}} # Build a finder that is allowed to assign sids finder = AssetFinder(metadata=metadata, allow_sid_assignment=True) # Verify that Assets were built play = finder.retrieve_asset_by_identifier('PLAY') self.assertEqual('PLAY', play.symbol)
def setUp(self): self.__calendar = date_range('2014', '2015', freq=trading_day) self.__assets = assets = Int64Index(arange(1, 20)) self.__finder = AssetFinder( make_simple_asset_info( assets, self.__calendar[0], self.__calendar[-1], ), db_path=':memory:', create_table=True, ) self.__mask = self.__finder.lifetimes(self.__calendar[-10:])
def test_insert_metadata(self): data = {0: {"start_date": "2014-01-01", "end_date": "2015-01-01", "symbol": "PLAY", "foo_data": "FOO"}} self.env.write_data(equities_data=data) finder = AssetFinder(self.env.engine) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual("PLAY", equity.symbol) self.assertEqual(pd.Timestamp("2015-01-01", tz="UTC"), equity.end_date) # Test invalid field with self.assertRaises(AttributeError): equity.foo_data
def test_lookup_symbol_fuzzy(self): as_of = pd.Timestamp('2013-01-01', tz='UTC') frame = pd.DataFrame.from_records([{ 'sid': i, 'file_name': 'TEST@%d' % i, 'company_name': "company%d" % i, 'start_date_nano': as_of.value, 'end_date_nano': as_of.value, 'exchange': uuid.uuid4().hex, } for i in range(3)]) finder = AssetFinder(frame, fuzzy_char='@') asset_0, asset_1, asset_2 = (finder.retrieve_asset(i) for i in range(3)) for i in range(2): # we do it twice to test for caching bugs self.assertIsNone(finder.lookup_symbol('test', as_of)) self.assertEqual(asset_1, finder.lookup_symbol('test@1', as_of)) # Adding an unnecessary fuzzy shouldn't matter. self.assertEqual(asset_1, finder.lookup_symbol('test@1', as_of, fuzzy=True)) # Shouldn't find this with no fuzzy_str passed. self.assertIsNone(finder.lookup_symbol('test1', as_of)) # Should find exact match. self.assertEqual( asset_1, finder.lookup_symbol('test1', as_of, fuzzy=True), )
def test_sid_assignment(self): # This metadata does not contain SIDs metadata = {'PLAY': {'symbol': 'PLAY'}, 'MSFT': {'symbol': 'MSFT'}} # Build a finder that is allowed to assign sids finder = AssetFinder(metadata=metadata, allow_sid_assignment=True) # Verify that Assets were built and different sids were assigned play = finder.lookup_symbol('PLAY', datetime.now()) msft = finder.lookup_symbol('MSFT', datetime.now()) self.assertEqual('PLAY', play.symbol) self.assertIsNotNone(play.sid) self.assertNotEqual(play.sid, msft.sid)
def __init__(self, load=None, bm_symbol='^GSPC', exchange_tz="US/Eastern", max_date=None, env_trading_calendar=tradingcalendar): """ @load is function that returns benchmark_returns and treasury_curves The treasury_curves are expected to be a DataFrame with an index of dates and columns of the curve names, e.g. '10year', '1month', etc. """ self.trading_day = env_trading_calendar.trading_day.copy() # `tc_td` is short for "trading calendar trading days" tc_td = env_trading_calendar.trading_days if max_date: self.trading_days = tc_td[tc_td <= max_date].copy() else: self.trading_days = tc_td.copy() self.first_trading_day = self.trading_days[0] self.last_trading_day = self.trading_days[-1] self.early_closes = env_trading_calendar.get_early_closes( self.first_trading_day, self.last_trading_day) self.open_and_closes = env_trading_calendar.open_and_closes.loc[ self.trading_days] self.prev_environment = self self.bm_symbol = bm_symbol if not load: load = load_market_data self.benchmark_returns, self.treasury_curves = \ load(self.trading_day, self.trading_days, self.bm_symbol) if max_date: tr_c = self.treasury_curves # Mask the treasury curves down to the current date. # In the case of live trading, the last date in the treasury # curves would be the day before the date considered to be # 'today'. self.treasury_curves = tr_c[tr_c.index <= max_date] self.exchange_tz = exchange_tz self.asset_finder = AssetFinder()
def __init__(self, load=None, bm_symbol='^GSPC', exchange_tz="US/Eastern", trading_schedule=default_nyse_schedule, asset_db_path=':memory:'): self.bm_symbol = bm_symbol if not load: load = load_market_data self.benchmark_returns, self.treasury_curves = load( trading_schedule.day, trading_schedule.schedule.index, self.bm_symbol, ) self.exchange_tz = exchange_tz if isinstance(asset_db_path, string_types): asset_db_path = 'sqlite:///%s' % asset_db_path self.engine = engine = create_engine(asset_db_path) else: self.engine = engine = asset_db_path if engine is not None: AssetDBWriter(engine).init_db() self.asset_finder = AssetFinder(engine) else: self.asset_finder = None
def load(name, environ=os.environ, timestamp=None): """Loads a previously ingested bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. Defaults of os.environ. timestamp : datetime, optional The timestamp of the data to lookup. Defaults to the current time. Returns ------- bundle_data : BundleData The raw data readers for this bundle. """ if timestamp is None: timestamp = pd.Timestamp.utcnow() timestr = most_recent_data(name, timestamp, environ=environ) return BundleData( asset_finder=AssetFinder( asset_db_path(name, timestr, environ=environ), ), equity_minute_bar_reader=BcolzMinuteBarReader( minute_equity_path(name, timestr, environ=environ), ), equity_daily_bar_reader=BcolzDailyBarReader( daily_equity_path(name, timestr, environ=environ), ), adjustment_reader=SQLiteAdjustmentReader( adjustment_db_path(name, timestr, environ=environ), ), )
def setUp(self): self.constants = { # Every day, assume every stock starts at 2, goes down to 1, # goes up to 4, and finishes at 3. USEquityPricing.low: 1, USEquityPricing.open: 2, USEquityPricing.close: 3, USEquityPricing.high: 4, } self.assets = [1, 2, 3] self.dates = date_range('2014-01-01', '2014-02-01', freq='D', tz='UTC') self.loader = ConstantLoader( constants=self.constants, dates=self.dates, assets=self.assets, ) self.asset_info = make_simple_asset_info( self.assets, start_date=self.dates[0], end_date=self.dates[-1], ) self.asset_finder = AssetFinder(self.asset_info)
def test_sid_assignment(self): # This metadata does not contain SIDs metadata = {'PLAY': {'symbol': 'PLAY'}, 'MSFT': {'symbol': 'MSFT'}} today = normalize_date(pd.Timestamp('2015-07-09', tz='UTC')) # Build a finder that is allowed to assign sids finder = AssetFinder(metadata=metadata, allow_sid_assignment=True) # Verify that Assets were built and different sids were assigned play = finder.lookup_symbol('PLAY', today) msft = finder.lookup_symbol('MSFT', today) self.assertEqual('PLAY', play.symbol) self.assertIsNotNone(play.sid) self.assertNotEqual(play.sid, msft.sid)
def setUpClass(cls): cls.AAPL = 1 cls.MSFT = 2 cls.BRK_A = 3 cls.assets = [cls.AAPL, cls.MSFT, cls.BRK_A] asset_info = make_simple_asset_info( cls.assets, Timestamp('2014'), Timestamp('2015'), ['AAPL', 'MSFT', 'BRK_A'], ) cls.env = trading.TradingEnvironment() cls.env.write_data(equities_df=asset_info) cls.asset_finder = AssetFinder(cls.env.engine) cls.tempdir = tempdir = TempDirectory() tempdir.create() try: cls.raw_data, cls.bar_reader = cls.create_bar_reader(tempdir) cls.adj_reader = cls.create_adjustment_reader(tempdir) cls.ffc_loader = USEquityPricingLoader( cls.bar_reader, cls.adj_reader ) except: cls.tempdir.cleanup() raise cls.dates = cls.raw_data[cls.AAPL].index.tz_localize('UTC')
def __init__(self, load=None, bm_symbol='^GSPC', exchange_tz="US/Eastern", max_date=None, env_trading_calendar=tradingcalendar, asset_db_path=':memory:'): """ @load is function that returns benchmark_returns and treasury_curves The treasury_curves are expected to be a DataFrame with an index of dates and columns of the curve names, e.g. '10year', '1month', etc. """ self.trading_day = env_trading_calendar.trading_day.copy() # `tc_td` is short for "trading calendar trading days" tc_td = env_trading_calendar.trading_days if max_date: self.trading_days = tc_td[tc_td <= max_date].copy() else: self.trading_days = tc_td.copy() self.first_trading_day = self.trading_days[0] self.last_trading_day = self.trading_days[-1] self.early_closes = env_trading_calendar.get_early_closes( self.first_trading_day, self.last_trading_day) self.open_and_closes = env_trading_calendar.open_and_closes.loc[ self.trading_days] self.bm_symbol = bm_symbol if not load: load = load_market_data self.benchmark_returns, self.treasury_curves = \ load(self.trading_day, self.trading_days, self.bm_symbol) if max_date: tr_c = self.treasury_curves # Mask the treasury curves down to the current date. # In the case of live trading, the last date in the treasury # curves would be the day before the date considered to be # 'today'. self.treasury_curves = tr_c[tr_c.index <= max_date] self.exchange_tz = exchange_tz if isinstance(asset_db_path, string_types): asset_db_path = 'sqlite:///%s' % asset_db_path self.engine = engine = create_engine(asset_db_path) AssetDBWriterFromDictionary().init_db(engine) else: self.engine = engine = asset_db_path if engine is not None: self.asset_finder = AssetFinder(engine) else: self.asset_finder = None
def test_finder_checks_version(self): version_table = self.metadata.tables['version_info'] version_table.delete().execute() write_version_info(self.engine, version_table, -2) check_version_info(self.engine, version_table, -2) # Assert that trying to build a finder with a bad db raises an error with self.assertRaises(AssetDBVersionError): AssetFinder(engine=self.engine) # Change the version number of the db to the correct version version_table.delete().execute() write_version_info(self.engine, version_table, ASSET_DB_VERSION) check_version_info(self.engine, version_table, ASSET_DB_VERSION) # Now that the versions match, this Finder should succeed AssetFinder(engine=self.engine)
def test_sid_assignment(self): # This metadata does not contain SIDs metadata = ["PLAY", "MSFT"] today = normalize_date(pd.Timestamp("2015-07-09", tz="UTC")) # Write data with sid assignment self.env.write_data(equities_identifiers=metadata, allow_sid_assignment=True) # Verify that Assets were built and different sids were assigned finder = AssetFinder(self.env.engine) play = finder.lookup_symbol("PLAY", today) msft = finder.lookup_symbol("MSFT", today) self.assertEqual("PLAY", play.symbol) self.assertIsNotNone(play.sid) self.assertNotEqual(play.sid, msft.sid)
def test_sid_assignment_failure(self): # This metadata does not contain SIDs metadata = {'PLAY': {'symbol': 'PLAY'}, 'MSFT': {'symbol': 'MSFT'}} # Build a finder that is not allowed to assign sids, asserting failure with self.assertRaises(SidAssignmentError): AssetFinder(metadata=metadata, allow_sid_assignment=False)
def test_insert_metadata(self): data = {0: {'start_date': '2014-01-01', 'end_date': '2015-01-01', 'symbol': "PLAY", 'foo_data': "FOO"}} self.env.write_data(equities_data=data) finder = AssetFinder(self.env.engine) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date) # Test invalid field with self.assertRaises(AttributeError): equity.foo_data
def test_lookup_symbol(self): # Incrementing by two so that start and end dates for each # generated Asset don't overlap (each Asset's end_date is the # day after its start date.) dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC') df = pd.DataFrame.from_records([{ 'sid': i, 'symbol': 'existing', 'start_date': date.value, 'end_date': (date + timedelta(days=1)).value, 'exchange': 'NYSE', } for i, date in enumerate(dates)]) self.env.write_data(equities_df=df) finder = AssetFinder(self.env.engine) for _ in range(2): # Run checks twice to test for caching bugs. with self.assertRaises(SymbolNotFound): finder.lookup_symbol('non_existing', dates[0]) with self.assertRaises(MultipleSymbolsFound): finder.lookup_symbol('existing', None) for i, date in enumerate(dates): # Verify that we correctly resolve multiple symbols using # the supplied date result = finder.lookup_symbol('existing', date) self.assertEqual(result.symbol, 'EXISTING') self.assertEqual(result.sid, i)
def test_consume_asset_as_identifier(self): # Build some end dates eq_end = pd.Timestamp("2012-01-01", tz="UTC") fut_end = pd.Timestamp("2008-01-01", tz="UTC") # Build some simple Assets equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end) future_asset = Future(200, symbol="TESTFUT", end_date=fut_end) # Consume the Assets self.env.write_data(equities_identifiers=[equity_asset], futures_identifiers=[future_asset]) finder = AssetFinder(self.env.engine) # Test equality with newly built Assets self.assertEqual(equity_asset, finder.retrieve_asset(1)) self.assertEqual(future_asset, finder.retrieve_asset(200)) self.assertEqual(eq_end, finder.retrieve_asset(1).end_date) self.assertEqual(fut_end, finder.retrieve_asset(200).end_date)
def test_sid_assignment(self): # This metadata does not contain SIDs metadata = ['PLAY', 'MSFT'] today = normalize_date(pd.Timestamp('2015-07-09', tz='UTC')) # Write data with sid assignment self.env.write_data(equities_identifiers=metadata, allow_sid_assignment=True) # Verify that Assets were built and different sids were assigned finder = AssetFinder(self.env.engine) play = finder.lookup_symbol('PLAY', today) msft = finder.lookup_symbol('MSFT', today) self.assertEqual('PLAY', play.symbol) self.assertIsNotNone(play.sid) self.assertNotEqual(play.sid, msft.sid)
def test_insert_metadata(self): finder = AssetFinder() finder.insert_metadata(0, asset_type='equity', start_date='2014-01-01', end_date='2015-01-01', symbol="PLAY", foo_data="FOO",) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date) # Test invalid field self.assertFalse('foo_data' in finder.metadata_cache[0])
def test_insert_metadata(self): finder = AssetFinder() finder.insert_metadata( 0, asset_type='equity', start_date='2014-01-01', end_date='2015-01-01', symbol="PLAY", foo_data="FOO", ) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date) # Test invalid field self.assertFalse('foo_data' in finder.metadata_cache[0])
def setUp(self): self.assets = [1, 2, 3] self.dates = date_range('2014-01-01', '2014-02-01', freq='D', tz='UTC') asset_info = make_simple_asset_info( self.assets, start_date=self.dates[0], end_date=self.dates[-1], ) self.asset_finder = AssetFinder(asset_info)
def test_insert_metadata(self): data = { 0: { 'start_date': '2014-01-01', 'end_date': '2015-01-01', 'symbol': "PLAY", 'foo_data': "FOO" } } self.env.write_data(equities_data=data) finder = AssetFinder(self.env.engine) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date) # Test invalid field with self.assertRaises(AttributeError): equity.foo_data
def test_map_identifier_index_to_sids(self): # Build an empty finder and some Assets dt = pd.Timestamp('2014-01-01', tz='UTC') finder = AssetFinder(self.env.engine) asset1 = Equity(1, symbol="AAPL") asset2 = Equity(2, symbol="GOOG") asset200 = Future(200, symbol="CLK15") asset201 = Future(201, symbol="CLM15") # Check for correct mapping and types pre_map = [asset1, asset2, asset200, asset201] post_map = finder.map_identifier_index_to_sids(pre_map, dt) self.assertListEqual([1, 2, 200, 201], post_map) for sid in post_map: self.assertIsInstance(sid, int) # Change order and check mapping again pre_map = [asset201, asset2, asset200, asset1] post_map = finder.map_identifier_index_to_sids(pre_map, dt) self.assertListEqual([201, 2, 200, 1], post_map)
def test_consume_asset_as_identifier(self): # Build some end dates eq_end = pd.Timestamp('2012-01-01', tz='UTC') fut_end = pd.Timestamp('2008-01-01', tz='UTC') # Build some simple Assets equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end) future_asset = Future(200, symbol="TESTFUT", end_date=fut_end) # Consume the Assets finder = AssetFinder() finder.consume_identifiers([equity_asset, future_asset]) finder.populate_cache() # Test equality with newly built Assets self.assertEqual(equity_asset, finder.retrieve_asset(1)) self.assertEqual(future_asset, finder.retrieve_asset(200)) self.assertEqual(eq_end, finder.retrieve_asset(1).end_date) self.assertEqual(fut_end, finder.retrieve_asset(200).end_date)
def test_map_identifier_index_to_sids(self): # Build an empty finder and some Assets dt = pd.Timestamp("2014-01-01", tz="UTC") finder = AssetFinder(self.env.engine) asset1 = Equity(1, symbol="AAPL") asset2 = Equity(2, symbol="GOOG") asset200 = Future(200, symbol="CLK15") asset201 = Future(201, symbol="CLM15") # Check for correct mapping and types pre_map = [asset1, asset2, asset200, asset201] post_map = finder.map_identifier_index_to_sids(pre_map, dt) self.assertListEqual([1, 2, 200, 201], post_map) for sid in post_map: self.assertIsInstance(sid, int) # Change order and check mapping again pre_map = [asset201, asset2, asset200, asset1] post_map = finder.map_identifier_index_to_sids(pre_map, dt) self.assertListEqual([201, 2, 200, 1], post_map)
def test_finder_checks_version(self): # Create an env and give it a bogus version number env = TradingEnvironment(load=noop_load) metadata = sa.MetaData(bind=env.engine) version_table = _version_table_schema(metadata) version_table.delete().execute() write_version_info(version_table, -2) check_version_info(version_table, -2) # Assert that trying to build a finder with a bad db raises an error with self.assertRaises(AssetDBVersionError): AssetFinder(engine=env.engine) # Change the version number of the db to the correct version version_table.delete().execute() write_version_info(version_table, ASSET_DB_VERSION) check_version_info(version_table, ASSET_DB_VERSION) # Now that the versions match, this Finder should succeed AssetFinder(engine=env.engine)
def test_asset_finder_doesnt_silently_create_useless_empty_files(self): nonexistent_path = self.tmpdir.getpath(self.id() + '__nothing_here') with self.assertRaises(ValueError) as e: AssetFinder(nonexistent_path) expected = "SQLite file {!r} doesn't exist.".format(nonexistent_path) self.assertEqual(str(e.exception), expected) # sqlite3.connect will create an empty file if you connect somewhere # nonexistent. Test that we don't do that. self.assertFalse(os.path.exists(nonexistent_path))
def test_compute_lifetimes(self, env=None): num_assets = 4 trading_day = env.trading_day first_start = pd.Timestamp('2015-04-01', tz='UTC') frame = make_rotating_asset_info( num_assets=num_assets, first_start=first_start, frequency=env.trading_day, periods_between_starts=3, asset_lifetime=5 ) finder = AssetFinder(frame) all_dates = pd.date_range( start=first_start, end=frame.end_date.max(), freq=trading_day, ) for dates in all_subindices(all_dates): expected_mask = full( shape=(len(dates), num_assets), fill_value=False, dtype=bool, ) for i, date in enumerate(dates): it = frame[['start_date', 'end_date']].itertuples() for j, start, end in it: if start <= date <= end: expected_mask[i, j] = True # Filter out columns with all-empty columns. expected_result = pd.DataFrame( data=expected_mask, index=dates, columns=frame.sid.values, ) actual_result = finder.lifetimes(dates) assert_frame_equal(actual_result, expected_result)
def test_consume_metadata(self): # Test dict consumption finder = AssetFinder({0: {'asset_type': 'equity'}}) dict_to_consume = {0: {'symbol': 'PLAY'}, 1: {'symbol': 'MSFT'}} finder.consume_metadata(dict_to_consume) self.assertEqual('equity', finder.metadata_cache[0]['asset_type']) self.assertEqual('PLAY', finder.metadata_cache[0]['symbol']) # Test dataframe consumption df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1]) df['asset_name'][0] = "Dave'N'Busters" df['exchange'][0] = "NASDAQ" df['asset_name'][1] = "Microsoft" df['exchange'][1] = "NYSE" finder.consume_metadata(df) self.assertEqual('NASDAQ', finder.metadata_cache[0]['exchange']) self.assertEqual('Microsoft', finder.metadata_cache[1]['asset_name']) # Check that old data survived self.assertEqual('equity', finder.metadata_cache[0]['asset_type'])
def test_lookup_symbol(self): # Incrementing by two so that start and end dates for each # generated Asset don't overlap (each Asset's end_date is the # day after its start date.) dates = pd.date_range("2013-01-01", freq="2D", periods=5, tz="UTC") df = pd.DataFrame.from_records( [ { "sid": i, "symbol": "existing", "start_date": date.value, "end_date": (date + timedelta(days=1)).value, "exchange": "NYSE", } for i, date in enumerate(dates) ] ) self.env.write_data(equities_df=df) finder = AssetFinder(self.env.engine) for _ in range(2): # Run checks twice to test for caching bugs. with self.assertRaises(SymbolNotFound): finder.lookup_symbol("NON_EXISTING", dates[0]) with self.assertRaises(MultipleSymbolsFound): finder.lookup_symbol("EXISTING", None) for i, date in enumerate(dates): # Verify that we correctly resolve multiple symbols using # the supplied date result = finder.lookup_symbol("EXISTING", date) self.assertEqual(result.symbol, "EXISTING") self.assertEqual(result.sid, i)
def test_insert_metadata(self): finder = AssetFinder() finder.insert_metadata( 0, asset_type='equity', start_date='2014-01-01', end_date='2015-01-01', symbol="PLAY", foo_data="FOO", ) # Test proper insertion self.assertEqual('equity', finder.metadata_cache[0]['asset_type']) self.assertEqual('PLAY', finder.metadata_cache[0]['symbol']) self.assertEqual('2015-01-01', finder.metadata_cache[0]['end_date']) # Test invalid field self.assertFalse('foo_data' in finder.metadata_cache[0]) # Test updating fields finder.insert_metadata( 0, asset_type='equity', start_date='2014-01-01', end_date='2015-02-01', symbol="PLAY", exchange="NYSE", ) self.assertEqual('2015-02-01', finder.metadata_cache[0]['end_date']) self.assertEqual('NYSE', finder.metadata_cache[0]['exchange']) # Check that old data survived self.assertEqual('PLAY', finder.metadata_cache[0]['symbol'])
def test_lookup_symbol(self): # Incrementing by two so that start and end dates for each # generated Asset don't overlap (each Asset's end_date is the # day after its start date.) dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC') df = pd.DataFrame.from_records( [ { 'sid': i, 'symbol': 'existing', 'start_date': date.value, 'end_date': (date + timedelta(days=1)).value, 'exchange': 'NYSE', } for i, date in enumerate(dates) ] ) self.env.write_data(equities_df=df) finder = AssetFinder(self.env.engine) for _ in range(2): # Run checks twice to test for caching bugs. with self.assertRaises(SymbolNotFound): finder.lookup_symbol('NON_EXISTING', dates[0]) with self.assertRaises(MultipleSymbolsFound): finder.lookup_symbol('EXISTING', None) for i, date in enumerate(dates): # Verify that we correctly resolve multiple symbols using # the supplied date result = finder.lookup_symbol('EXISTING', date) self.assertEqual(result.symbol, 'EXISTING') self.assertEqual(result.sid, i)
def __init__(self, load=None, bm_symbol='^GSPC', exchange_tz="US/Eastern", min_date=None, max_date=None, env_trading_calendar=tradingcalendar, asset_db_path=':memory:'): self.trading_day = env_trading_calendar.trading_day.copy() # `tc_td` is short for "trading calendar trading days" tc_td = env_trading_calendar.trading_days self.trading_days = tc_td[tc_td.slice_indexer(min_date, max_date)] self.first_trading_day = self.trading_days[0] self.last_trading_day = self.trading_days[-1] self.early_closes = env_trading_calendar.get_early_closes( self.first_trading_day, self.last_trading_day) self.open_and_closes = env_trading_calendar.open_and_closes.loc[ self.trading_days] self.bm_symbol = bm_symbol if not load: load = load_market_data self.benchmark_returns, self.treasury_curves = \ load(self.trading_day, self.trading_days, self.bm_symbol) if max_date: tr_c = self.treasury_curves # Mask the treasury curves down to the current date. # In the case of live trading, the last date in the treasury # curves would be the day before the date considered to be # 'today'. self.treasury_curves = tr_c[tr_c.index <= max_date] self.exchange_tz = exchange_tz if isinstance(asset_db_path, string_types): asset_db_path = 'sqlite:///%s' % asset_db_path self.engine = engine = create_engine(asset_db_path) else: self.engine = engine = asset_db_path if engine is not None: AssetDBWriter(engine).init_db() self.asset_finder = AssetFinder(engine) else: self.asset_finder = None
def test_consume_metadata(self): # Test dict consumption dict_to_consume = {0: {"symbol": "PLAY"}, 1: {"symbol": "MSFT"}} self.env.write_data(equities_data=dict_to_consume) finder = AssetFinder(self.env.engine) equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual("PLAY", equity.symbol) # Test dataframe consumption df = pd.DataFrame(columns=["asset_name", "exchange"], index=[0, 1]) df["asset_name"][0] = "Dave'N'Busters" df["exchange"][0] = "NASDAQ" df["asset_name"][1] = "Microsoft" df["exchange"][1] = "NYSE" self.env = TradingEnvironment(load=noop_load) self.env.write_data(equities_df=df) finder = AssetFinder(self.env.engine) self.assertEqual("NASDAQ", finder.retrieve_asset(0).exchange) self.assertEqual("Microsoft", finder.retrieve_asset(1).asset_name)
def test_compute_lifetimes(self, env=None): num_assets = 4 trading_day = env.trading_day first_start = pd.Timestamp('2015-04-01', tz='UTC') frame = make_rotating_asset_info(num_assets=num_assets, first_start=first_start, frequency=env.trading_day, periods_between_starts=3, asset_lifetime=5) finder = AssetFinder(frame) all_dates = pd.date_range( start=first_start, end=frame.end_date.max(), freq=trading_day, ) for dates in all_subindices(all_dates): expected_mask = full( shape=(len(dates), num_assets), fill_value=False, dtype=bool, ) for i, date in enumerate(dates): it = frame[['start_date', 'end_date']].itertuples() for j, start, end in it: if start <= date <= end: expected_mask[i, j] = True # Filter out columns with all-empty columns. expected_result = pd.DataFrame( data=expected_mask, index=dates, columns=frame.sid.values, ) actual_result = finder.lifetimes(dates) assert_frame_equal(actual_result, expected_result)
def __init__( self, load=None, bm_symbol='^GSPC', exchange_tz="US/Eastern", max_date=None, env_trading_calendar=tradingcalendar ): """ @load is function that returns benchmark_returns and treasury_curves The treasury_curves are expected to be a DataFrame with an index of dates and columns of the curve names, e.g. '10year', '1month', etc. """ self.trading_day = env_trading_calendar.trading_day.copy() # `tc_td` is short for "trading calendar trading days" tc_td = env_trading_calendar.trading_days if max_date: self.trading_days = tc_td[tc_td <= max_date].copy() else: self.trading_days = tc_td.copy() self.first_trading_day = self.trading_days[0] self.last_trading_day = self.trading_days[-1] self.early_closes = env_trading_calendar.get_early_closes( self.first_trading_day, self.last_trading_day) self.open_and_closes = env_trading_calendar.open_and_closes.loc[ self.trading_days] self.prev_environment = self self.bm_symbol = bm_symbol if not load: load = load_market_data self.benchmark_returns, self.treasury_curves = \ load(self.trading_day, self.trading_days, self.bm_symbol) if max_date: tr_c = self.treasury_curves # Mask the treasury curves down to the current date. # In the case of live trading, the last date in the treasury # curves would be the day before the date considered to be # 'today'. self.treasury_curves = tr_c[tr_c.index <= max_date] self.exchange_tz = exchange_tz self.asset_finder = AssetFinder()
def test_consume_metadata(self): # Test dict consumption dict_to_consume = {0: {'symbol': 'PLAY'}, 1: {'symbol': 'MSFT'}} self.env.write_data(equities_data=dict_to_consume) finder = AssetFinder(self.env.engine) equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) # Test dataframe consumption df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1]) df['asset_name'][0] = "Dave'N'Busters" df['exchange'][0] = "NASDAQ" df['asset_name'][1] = "Microsoft" df['exchange'][1] = "NYSE" self.env = TradingEnvironment(load=noop_load) self.env.write_data(equities_df=df) finder = AssetFinder(self.env.engine) self.assertEqual('NASDAQ', finder.retrieve_asset(0).exchange) self.assertEqual('Microsoft', finder.retrieve_asset(1).asset_name)
def engine_from_files(daily_bar_path, adjustments_path, asset_db_path, calendar, warmup_assets=False): """ Construct a SimplePipelineEngine from local filesystem resources. Parameters ---------- daily_bar_path : str Path to pass to `BcolzDailyBarReader`. adjustments_path : str Path to pass to SQLiteAdjustmentReader. asset_db_path : str Path to pass to `AssetFinder`. calendar : pd.DatetimeIndex Calendar to use for the loader. warmup_assets : bool, optional Whether or not to populate AssetFinder caches. This can speed up initial latency on subsequent pipeline runs, at the cost of extra memory consumption. Default is False """ loader = USEquityPricingLoader.from_files(daily_bar_path, adjustments_path) if not asset_db_path.startswith("sqlite:"): asset_db_path = "sqlite:///" + asset_db_path asset_finder = AssetFinder(asset_db_path) if warmup_assets: results = asset_finder.retrieve_all(asset_finder.sids) print("Warmed up %d assets." % len(results)) return SimplePipelineEngine( lambda _: loader, calendar, asset_finder, )
def test_yahoo_bars_to_panel_source(self): finder = AssetFinder() stocks = ['AAPL', 'GE'] start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) data = factory.load_bars_from_yahoo(stocks=stocks, indexes={}, start=start, end=end) check_fields = ['sid', 'open', 'high', 'low', 'close', 'volume', 'price'] copy_panel = data.copy() sids = finder.map_identifier_index_to_sids( data.items, data.major_axis[0] ) copy_panel.items = sids source = DataPanelSource(copy_panel) for event in source: for check_field in check_fields: self.assertIn(check_field, event) self.assertTrue(isinstance(event['volume'], (integer_types))) self.assertTrue(event['sid'] in sids)
def test_lookup_future_by_expiration(self): metadata = { 2: { 'symbol': 'ADN15', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-06-15', tz='UTC') }, 1: { 'symbol': 'ADV15', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-09-14', tz='UTC') }, 0: { 'symbol': 'ADF16', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-12-14', tz='UTC') }, } finder = AssetFinder(metadata=metadata) dt = pd.Timestamp('2015-06-19', tz='UTC') # First-of-the-month timestamps may_15 = pd.Timestamp('2015-05-01', tz='UTC') june_15 = pd.Timestamp('2015-06-01', tz='UTC') sept_15 = pd.Timestamp('2015-09-01', tz='UTC') dec_15 = pd.Timestamp('2015-12-01', tz='UTC') jan_16 = pd.Timestamp('2016-01-01', tz='UTC') # ADV15 is the next valid contract, so check that we get it # for every ref_date before 9/14/15 contract = finder.lookup_future_by_expiration('AD', dt, may_15) self.assertEqual(contract.sid, 1) contract = finder.lookup_future_by_expiration('AD', dt, june_15) self.assertEqual(contract.sid, 1) contract = finder.lookup_future_by_expiration('AD', dt, sept_15) self.assertEqual(contract.sid, 1) # ADF16 has the next expiration date after 12/1/15 contract = finder.lookup_future_by_expiration('AD', dt, dec_15) self.assertEqual(contract.sid, 0) # No contracts exist after 12/14/2015, so we should get none self.assertIsNone(finder.lookup_future_by_expiration('AD', dt, jan_16))
def __init__( self, load=None, bm_symbol='SPY', exchange_tz="US/Eastern", trading_calendar=None, trading_day=None, trading_days=None, asset_db_path=':memory:', future_chain_predicates=CHAIN_PREDICATES, environ=None, ): self.bm_symbol = bm_symbol if not load: load = partial(load_market_data, environ=environ) if trading_day is None: if not trading_calendar: trading_calendar = get_calendar("NYSE") trading_day = trading_calendar.day if trading_days is None: if not trading_calendar: trading_calendar = get_calendar("NYSE") trading_days = trading_calendar.schedule.index self.benchmark_returns, self.treasury_curves = load( trading_day, trading_days, self.bm_symbol, ) self.exchange_tz = exchange_tz if isinstance(asset_db_path, string_types): asset_db_path = 'sqlite:///' + asset_db_path self.engine = engine = create_engine(asset_db_path) else: self.engine = engine = asset_db_path if engine is not None: AssetDBWriter(engine).init_db() self.asset_finder = AssetFinder( engine, future_chain_predicates=future_chain_predicates) else: self.asset_finder = None
def test_consume_metadata(self): # Test dict consumption finder = AssetFinder() dict_to_consume = {0: {'symbol': 'PLAY'}, 1: {'symbol': 'MSFT'}} finder.consume_metadata(dict_to_consume) equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) finder = AssetFinder() # Test dataframe consumption df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1]) df['asset_name'][0] = "Dave'N'Busters" df['exchange'][0] = "NASDAQ" df['asset_name'][1] = "Microsoft" df['exchange'][1] = "NYSE" finder.consume_metadata(df) self.assertEqual('NASDAQ', finder.metadata_cache[0]['exchange']) self.assertEqual('Microsoft', finder.metadata_cache[1]['asset_name'])
def test_lookup_future_by_expiration(self): metadata = { 2: { 'symbol': 'ADN15', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-06-15', tz='UTC'), 'start_date': pd.Timestamp('2015-01-01', tz='UTC') }, 1: { 'symbol': 'ADV15', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-09-14', tz='UTC'), 'start_date': pd.Timestamp('2015-01-01', tz='UTC') }, 0: { 'symbol': 'ADF16', 'root_symbol': 'AD', 'asset_type': 'future', 'expiration_date': pd.Timestamp('2015-12-14', tz='UTC'), 'start_date': pd.Timestamp('2015-01-01', tz='UTC') }, } finder = AssetFinder(metadata=metadata) dt = pd.Timestamp('2015-06-19', tz='UTC') # First-of-the-month timestamps may_15 = pd.Timestamp('2015-05-01', tz='UTC') june_15 = pd.Timestamp('2015-06-01', tz='UTC') sept_15 = pd.Timestamp('2015-09-01', tz='UTC') dec_15 = pd.Timestamp('2015-12-01', tz='UTC') jan_16 = pd.Timestamp('2016-01-01', tz='UTC') # ADV15 is the next valid contract, so check that we get it # for every ref_date before 9/14/15 contract = finder.lookup_future_by_expiration('AD', dt, may_15) self.assertEqual(contract.sid, 1) contract = finder.lookup_future_by_expiration('AD', dt, june_15) self.assertEqual(contract.sid, 1) contract = finder.lookup_future_by_expiration('AD', dt, sept_15) self.assertEqual(contract.sid, 1) # ADF16 has the next expiration date after 12/1/15 contract = finder.lookup_future_by_expiration('AD', dt, dec_15) self.assertEqual(contract.sid, 0) # No contracts exist after 12/14/2015, so we should get none self.assertIsNone(finder.lookup_future_by_expiration('AD', dt, jan_16))
def test_lookup_symbol_fuzzy(self): as_of = pd.Timestamp('2013-01-01', tz='UTC') frame = pd.DataFrame.from_records( [ { 'sid': i, 'file_name': 'TEST@%d' % i, 'company_name': "company%d" % i, 'start_date_nano': as_of.value, 'end_date_nano': as_of.value, 'exchange': uuid.uuid4().hex, } for i in range(3) ] ) finder = AssetFinder(frame) asset_0, asset_1, asset_2 = ( finder.retrieve_asset(i) for i in range(3) ) for i in range(2): # we do it twice to test for caching bugs self.assertIsNone(finder.lookup_symbol('test', as_of)) self.assertEqual( asset_1, finder.lookup_symbol('test@1', as_of) ) # Adding an unnecessary fuzzy shouldn't matter. self.assertEqual( asset_1, finder.lookup_symbol('test@1', as_of, fuzzy='@') ) # Shouldn't find this with no fuzzy_str passed. self.assertIsNone(finder.lookup_symbol('test1', as_of)) # Shouldn't find this with an incorrect fuzzy_str. self.assertIsNone(finder.lookup_symbol('test1', as_of, fuzzy='*')) # Should find it with the correct fuzzy_str. self.assertEqual( asset_1, finder.lookup_symbol('test1', as_of, fuzzy='@'), )
def test_lookup_symbol_delimited(self): as_of = pd.Timestamp('2013-01-01', tz='UTC') frame = pd.DataFrame.from_records( [ { 'sid': i, 'symbol': 'TEST.%d' % i, 'company_name': "company%d" % i, 'start_date': as_of.value, 'end_date': as_of.value, 'exchange': uuid.uuid4().hex } for i in range(3) ] ) self.env.write_data(equities_df=frame) finder = AssetFinder(self.env.engine) asset_0, asset_1, asset_2 = ( finder.retrieve_asset(i) for i in range(3) ) # we do it twice to catch caching bugs for i in range(2): with self.assertRaises(SymbolNotFound): finder.lookup_symbol('TEST', as_of) with self.assertRaises(SymbolNotFound): finder.lookup_symbol('TEST1', as_of) # '@' is not a supported delimiter with self.assertRaises(SymbolNotFound): finder.lookup_symbol('TEST@1', as_of) # Adding an unnecessary fuzzy shouldn't matter. for fuzzy_char in ['-', '/', '_', '.']: self.assertEqual( asset_1, finder.lookup_symbol('TEST%s1' % fuzzy_char, as_of) )
def setUp(self): env = TradingEnvironment.instance() day = env.trading_day self.assets = Int64Index([1, 2, 3]) self.dates = date_range( '2015-01-01', '2015-01-31', freq=day, tz='UTC', ) asset_info = make_simple_asset_info( self.assets, start_date=self.dates[0], end_date=self.dates[-1], ) self.asset_finder = AssetFinder(asset_info)