def test_lookup_symbol_fuzzy(self): as_of = pd.Timestamp('2013-01-01', tz='UTC') frame = pd.DataFrame.from_records([{ 'sid': i, 'file_name': 'TEST@%d' % i, 'company_name': "company%d" % i, 'start_date_nano': as_of.value, 'end_date_nano': as_of.value, 'exchange': uuid.uuid4().hex, } for i in range(3)]) finder = AssetFinder(frame, fuzzy_char='@') asset_0, asset_1, asset_2 = (finder.retrieve_asset(i) for i in range(3)) for i in range(2): # we do it twice to test for caching bugs self.assertIsNone(finder.lookup_symbol('test', as_of)) self.assertEqual(asset_1, finder.lookup_symbol('test@1', as_of)) # Adding an unnecessary fuzzy shouldn't matter. self.assertEqual(asset_1, finder.lookup_symbol('test@1', as_of, fuzzy=True)) # Shouldn't find this with no fuzzy_str passed. self.assertIsNone(finder.lookup_symbol('test1', as_of)) # Should find exact match. self.assertEqual( asset_1, finder.lookup_symbol('test1', as_of, fuzzy=True), )
def test_lookup_symbol_delimited(self): as_of = pd.Timestamp('2013-01-01', tz='UTC') frame = pd.DataFrame.from_records([{ 'sid': i, 'symbol': 'TEST.%d' % i, 'company_name': "company%d" % i, 'start_date': as_of.value, 'end_date': as_of.value, 'exchange': uuid.uuid4().hex } for i in range(3)]) self.env.write_data(equities_df=frame) finder = AssetFinder(self.env.engine) asset_0, asset_1, asset_2 = (finder.retrieve_asset(i) for i in range(3)) # we do it twice to catch caching bugs for i in range(2): with self.assertRaises(SymbolNotFound): finder.lookup_symbol('test', as_of) with self.assertRaises(SymbolNotFound): finder.lookup_symbol('test1', as_of) # '@' is not a supported delimiter with self.assertRaises(SymbolNotFound): finder.lookup_symbol('test@1', as_of) # Adding an unnecessary fuzzy shouldn't matter. for fuzzy_char in ['-', '/', '_', '.']: self.assertEqual( asset_1, finder.lookup_symbol('test%s1' % fuzzy_char, as_of))
def test_lookup_symbol_delimited(self): as_of = pd.Timestamp("2013-01-01", tz="UTC") frame = pd.DataFrame.from_records( [ { "sid": i, "symbol": "TEST.%d" % i, "company_name": "company%d" % i, "start_date": as_of.value, "end_date": as_of.value, "exchange": uuid.uuid4().hex, } for i in range(3) ] ) self.env.write_data(equities_df=frame) finder = AssetFinder(self.env.engine) asset_0, asset_1, asset_2 = (finder.retrieve_asset(i) for i in range(3)) # we do it twice to catch caching bugs for i in range(2): with self.assertRaises(SymbolNotFound): finder.lookup_symbol("TEST", as_of) with self.assertRaises(SymbolNotFound): finder.lookup_symbol("TEST1", as_of) # '@' is not a supported delimiter with self.assertRaises(SymbolNotFound): finder.lookup_symbol("TEST@1", as_of) # Adding an unnecessary fuzzy shouldn't matter. for fuzzy_char in ["-", "/", "_", "."]: self.assertEqual(asset_1, finder.lookup_symbol("TEST%s1" % fuzzy_char, as_of))
def test_consume_asset_as_identifier(self): # Build some end dates eq_end = pd.Timestamp("2012-01-01", tz="UTC") fut_end = pd.Timestamp("2008-01-01", tz="UTC") # Build some simple Assets equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end) future_asset = Future(200, symbol="TESTFUT", end_date=fut_end) # Consume the Assets self.env.write_data(equities_identifiers=[equity_asset], futures_identifiers=[future_asset]) finder = AssetFinder(self.env.engine) # Test equality with newly built Assets self.assertEqual(equity_asset, finder.retrieve_asset(1)) self.assertEqual(future_asset, finder.retrieve_asset(200)) self.assertEqual(eq_end, finder.retrieve_asset(1).end_date) self.assertEqual(fut_end, finder.retrieve_asset(200).end_date)
def test_consume_asset_as_identifier(self): # Build some end dates eq_end = pd.Timestamp('2012-01-01', tz='UTC') fut_end = pd.Timestamp('2008-01-01', tz='UTC') # Build some simple Assets equity_asset = Equity(1, symbol="TESTEQ", end_date=eq_end) future_asset = Future(200, symbol="TESTFUT", end_date=fut_end) # Consume the Assets finder = AssetFinder() finder.consume_identifiers([equity_asset, future_asset]) # Test equality with newly built Assets self.assertEqual(equity_asset, finder.retrieve_asset(1)) self.assertEqual(future_asset, finder.retrieve_asset(200)) self.assertEqual(eq_end, finder.retrieve_asset(1).end_date) self.assertEqual(fut_end, finder.retrieve_asset(200).end_date)
def test_consume_metadata(self): # Test dict consumption dict_to_consume = {0: {'symbol': 'PLAY'}, 1: {'symbol': 'MSFT'}} self.env.write_data(equities_data=dict_to_consume) finder = AssetFinder(self.env.engine) equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) # Test dataframe consumption df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1]) df['asset_name'][0] = "Dave'N'Busters" df['exchange'][0] = "NASDAQ" df['asset_name'][1] = "Microsoft" df['exchange'][1] = "NYSE" self.env = TradingEnvironment() self.env.write_data(equities_df=df) finder = AssetFinder(self.env.engine) self.assertEqual('NASDAQ', finder.retrieve_asset(0).exchange) self.assertEqual('Microsoft', finder.retrieve_asset(1).asset_name)
def test_consume_metadata(self): # Test dict consumption dict_to_consume = {0: {"symbol": "PLAY"}, 1: {"symbol": "MSFT"}} self.env.write_data(equities_data=dict_to_consume) finder = AssetFinder(self.env.engine) equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual("PLAY", equity.symbol) # Test dataframe consumption df = pd.DataFrame(columns=["asset_name", "exchange"], index=[0, 1]) df["asset_name"][0] = "Dave'N'Busters" df["exchange"][0] = "NASDAQ" df["asset_name"][1] = "Microsoft" df["exchange"][1] = "NYSE" self.env = TradingEnvironment(load=noop_load) self.env.write_data(equities_df=df) finder = AssetFinder(self.env.engine) self.assertEqual("NASDAQ", finder.retrieve_asset(0).exchange) self.assertEqual("Microsoft", finder.retrieve_asset(1).asset_name)
def test_insert_metadata(self): data = {0: {"start_date": "2014-01-01", "end_date": "2015-01-01", "symbol": "PLAY", "foo_data": "FOO"}} self.env.write_data(equities_data=data) finder = AssetFinder(self.env.engine) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual("PLAY", equity.symbol) self.assertEqual(pd.Timestamp("2015-01-01", tz="UTC"), equity.end_date) # Test invalid field with self.assertRaises(AttributeError): equity.foo_data
def test_consume_metadata(self): # Test dict consumption dict_to_consume = {0: {'symbol': 'PLAY'}, 1: {'symbol': 'MSFT'}} self.env.write_data(equities_data=dict_to_consume) finder = AssetFinder(self.env.engine) equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) # Test dataframe consumption df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1]) df['asset_name'][0] = "Dave'N'Busters" df['exchange'][0] = "NASDAQ" df['asset_name'][1] = "Microsoft" df['exchange'][1] = "NYSE" self.env = TradingEnvironment(load=noop_load) self.env.write_data(equities_df=df) finder = AssetFinder(self.env.engine) self.assertEqual('NASDAQ', finder.retrieve_asset(0).exchange) self.assertEqual('Microsoft', finder.retrieve_asset(1).asset_name)
def test_insert_metadata(self): data = {0: {'start_date': '2014-01-01', 'end_date': '2015-01-01', 'symbol': "PLAY", 'foo_data': "FOO"}} self.env.write_data(equities_data=data) finder = AssetFinder(self.env.engine) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date) # Test invalid field with self.assertRaises(AttributeError): equity.foo_data
def test_insert_metadata(self): finder = AssetFinder() finder.insert_metadata(0, asset_type='equity', start_date='2014-01-01', end_date='2015-01-01', symbol="PLAY", foo_data="FOO",) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date) # Test invalid field self.assertFalse('foo_data' in finder.metadata_cache[0])
def test_insert_metadata(self): finder = AssetFinder() finder.insert_metadata( 0, asset_type='equity', start_date='2014-01-01', end_date='2015-01-01', symbol="PLAY", foo_data="FOO", ) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date) # Test invalid field self.assertFalse('foo_data' in finder.metadata_cache[0])
def test_lookup_symbol_fuzzy(self): as_of = pd.Timestamp('2013-01-01', tz='UTC') frame = pd.DataFrame.from_records( [ { 'sid': i, 'file_name': 'TEST@%d' % i, 'company_name': "company%d" % i, 'start_date_nano': as_of.value, 'end_date_nano': as_of.value, 'exchange': uuid.uuid4().hex, } for i in range(3) ] ) finder = AssetFinder(frame) asset_0, asset_1, asset_2 = ( finder.retrieve_asset(i) for i in range(3) ) for i in range(2): # we do it twice to test for caching bugs self.assertIsNone(finder.lookup_symbol('test', as_of)) self.assertEqual( asset_1, finder.lookup_symbol('test@1', as_of) ) # Adding an unnecessary fuzzy shouldn't matter. self.assertEqual( asset_1, finder.lookup_symbol('test@1', as_of, fuzzy='@') ) # Shouldn't find this with no fuzzy_str passed. self.assertIsNone(finder.lookup_symbol('test1', as_of)) # Shouldn't find this with an incorrect fuzzy_str. self.assertIsNone(finder.lookup_symbol('test1', as_of, fuzzy='*')) # Should find it with the correct fuzzy_str. self.assertEqual( asset_1, finder.lookup_symbol('test1', as_of, fuzzy='@'), )
def test_lookup_symbol_fuzzy(self): as_of = pd.Timestamp('2013-01-01', tz='UTC') frame = pd.DataFrame.from_records( [ { 'sid': i, 'symbol': 'TEST@%d' % i, 'company_name': "company%d" % i, 'start_date': as_of.value, 'end_date': as_of.value, 'exchange': uuid.uuid4().hex, 'fuzzy': 'TEST%d' % i } for i in range(3) ] ) self.env.write_data(equities_df=frame) finder = AssetFinder(self.env.engine, fuzzy_char='@') asset_0, asset_1, asset_2 = ( finder.retrieve_asset(i) for i in range(3) ) for i in range(2): # we do it twice to test for caching bugs self.assertIsNone(finder.lookup_symbol('test', as_of)) self.assertEqual( asset_1, finder.lookup_symbol('test@1', as_of) ) # Adding an unnecessary fuzzy shouldn't matter. self.assertEqual( asset_1, finder.lookup_symbol('test@1', as_of, fuzzy=True) ) # Shouldn't find this with no fuzzy_str passed. self.assertIsNone(finder.lookup_symbol('test1', as_of)) # Should find exact match. self.assertEqual( asset_1, finder.lookup_symbol('test1', as_of, fuzzy=True), )
def test_insert_metadata(self): data = { 0: { 'start_date': '2014-01-01', 'end_date': '2015-01-01', 'symbol': "PLAY", 'foo_data': "FOO" } } self.env.write_data(equities_data=data) finder = AssetFinder(self.env.engine) # Test proper insertion equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) self.assertEqual(pd.Timestamp('2015-01-01', tz='UTC'), equity.end_date) # Test invalid field with self.assertRaises(AttributeError): equity.foo_data
def test_consume_metadata(self): # Test dict consumption finder = AssetFinder() dict_to_consume = {0: {'symbol': 'PLAY'}, 1: {'symbol': 'MSFT'}} finder.consume_metadata(dict_to_consume) equity = finder.retrieve_asset(0) self.assertIsInstance(equity, Equity) self.assertEqual('PLAY', equity.symbol) finder = AssetFinder() # Test dataframe consumption df = pd.DataFrame(columns=['asset_name', 'exchange'], index=[0, 1]) df['asset_name'][0] = "Dave'N'Busters" df['exchange'][0] = "NASDAQ" df['asset_name'][1] = "Microsoft" df['exchange'][1] = "NYSE" finder.consume_metadata(df) self.assertEqual('NASDAQ', finder.metadata_cache[0]['exchange']) self.assertEqual('Microsoft', finder.metadata_cache[1]['asset_name'])
def test_lookup_symbol_delimited(self): as_of = pd.Timestamp('2013-01-01', tz='UTC') frame = pd.DataFrame.from_records( [ { 'sid': i, 'symbol': 'TEST.%d' % i, 'company_name': "company%d" % i, 'start_date': as_of.value, 'end_date': as_of.value, 'exchange': uuid.uuid4().hex } for i in range(3) ] ) self.env.write_data(equities_df=frame) finder = AssetFinder(self.env.engine) asset_0, asset_1, asset_2 = ( finder.retrieve_asset(i) for i in range(3) ) # we do it twice to catch caching bugs for i in range(2): with self.assertRaises(SymbolNotFound): finder.lookup_symbol('TEST', as_of) with self.assertRaises(SymbolNotFound): finder.lookup_symbol('TEST1', as_of) # '@' is not a supported delimiter with self.assertRaises(SymbolNotFound): finder.lookup_symbol('TEST@1', as_of) # Adding an unnecessary fuzzy shouldn't matter. for fuzzy_char in ['-', '/', '_', '.']: self.assertEqual( asset_1, finder.lookup_symbol('TEST%s1' % fuzzy_char, as_of) )
def build_lookup_generic_cases(): """ Generate test cases for AssetFinder test_lookup_generic. """ unique_start = pd.Timestamp('2013-01-01', tz='UTC') unique_end = pd.Timestamp('2014-01-01', tz='UTC') dupe_0_start = pd.Timestamp('2013-01-01', tz='UTC') dupe_0_end = dupe_0_start + timedelta(days=1) dupe_1_start = pd.Timestamp('2013-01-03', tz='UTC') dupe_1_end = dupe_1_start + timedelta(days=1) frame = pd.DataFrame.from_records([ { 'sid': 0, 'file_name': 'duplicated', 'company_name': 'duplicated_0', 'start_date_nano': dupe_0_start.value, 'end_date_nano': dupe_0_end.value, 'exchange': '', }, { 'sid': 1, 'file_name': 'duplicated', 'company_name': 'duplicated_1', 'start_date_nano': dupe_1_start.value, 'end_date_nano': dupe_1_end.value, 'exchange': '', }, { 'sid': 2, 'file_name': 'unique', 'company_name': 'unique', 'start_date_nano': unique_start.value, 'end_date_nano': unique_end.value, 'exchange': '', }, ], ) finder = AssetFinder(metadata=frame) dupe_0, dupe_1, unique = assets = [ finder.retrieve_asset(i) for i in range(3) ] dupe_0_start = dupe_0.start_date dupe_1_start = dupe_1.start_date cases = [ ## # Scalars # Asset object (finder, assets[0], None, assets[0]), (finder, assets[1], None, assets[1]), (finder, assets[2], None, assets[2]), # int (finder, 0, None, assets[0]), (finder, 1, None, assets[1]), (finder, 2, None, assets[2]), # Duplicated symbol with resolution date (finder, 'duplicated', dupe_0_start, dupe_0), (finder, 'duplicated', dupe_1_start, dupe_1), # Unique symbol, with or without resolution date. (finder, 'unique', unique_start, unique), (finder, 'unique', None, unique), ## # Iterables # Iterables of Asset objects. (finder, assets, None, assets), (finder, iter(assets), None, assets), # Iterables of ints (finder, (0, 1), None, assets[:-1]), (finder, iter((0, 1)), None, assets[:-1]), # Iterables of symbols. (finder, ('duplicated', 'unique'), dupe_0_start, [dupe_0, unique]), (finder, ('duplicated', 'unique'), dupe_1_start, [dupe_1, unique]), # Mixed types (finder, ('duplicated', 2, 'unique', 1, dupe_1), dupe_0_start, [dupe_0, assets[2], unique, assets[1], dupe_1]), ] return cases
def build_lookup_generic_cases(): """ Generate test cases for AssetFinder test_lookup_generic. """ unique_start = pd.Timestamp('2013-01-01', tz='UTC') unique_end = pd.Timestamp('2014-01-01', tz='UTC') dupe_0_start = pd.Timestamp('2013-01-01', tz='UTC') dupe_0_end = dupe_0_start + timedelta(days=1) dupe_1_start = pd.Timestamp('2013-01-03', tz='UTC') dupe_1_end = dupe_1_start + timedelta(days=1) frame = pd.DataFrame.from_records( [ { 'sid': 0, 'file_name': 'duplicated', 'company_name': 'duplicated_0', 'start_date_nano': dupe_0_start.value, 'end_date_nano': dupe_0_end.value, 'exchange': '', }, { 'sid': 1, 'file_name': 'duplicated', 'company_name': 'duplicated_1', 'start_date_nano': dupe_1_start.value, 'end_date_nano': dupe_1_end.value, 'exchange': '', }, { 'sid': 2, 'file_name': 'unique', 'company_name': 'unique', 'start_date_nano': unique_start.value, 'end_date_nano': unique_end.value, 'exchange': '', }, ], ) finder = AssetFinder(metadata=frame) dupe_0, dupe_1, unique = assets = [ finder.retrieve_asset(i) for i in range(3) ] dupe_0_start = dupe_0.start_date dupe_1_start = dupe_1.start_date cases = [ ## # Scalars # Asset object (finder, assets[0], None, assets[0]), (finder, assets[1], None, assets[1]), (finder, assets[2], None, assets[2]), # int (finder, 0, None, assets[0]), (finder, 1, None, assets[1]), (finder, 2, None, assets[2]), # Duplicated symbol with resolution date (finder, 'duplicated', dupe_0_start, dupe_0), (finder, 'duplicated', dupe_1_start, dupe_1), # Unique symbol, with or without resolution date. (finder, 'unique', unique_start, unique), (finder, 'unique', None, unique), ## # Iterables # Iterables of Asset objects. (finder, assets, None, assets), (finder, iter(assets), None, assets), # Iterables of ints (finder, (0, 1), None, assets[:-1]), (finder, iter((0, 1)), None, assets[:-1]), # Iterables of symbols. (finder, ('duplicated', 'unique'), dupe_0_start, [dupe_0, unique]), (finder, ('duplicated', 'unique'), dupe_1_start, [dupe_1, unique]), # Mixed types (finder, ('duplicated', 2, 'unique', 1, dupe_1), dupe_0_start, [dupe_0, assets[2], unique, assets[1], dupe_1]), ] return cases