def make_equity_info(cls): out = pd.concat( [ # 15 assets on each exchange. Each asset lives for 5 days. # A new asset starts each day. make_rotating_equity_info( num_assets=20, first_start=cls.START_DATE, frequency=get_calendar(exchange).day, periods_between_starts=1, # NOTE: The asset_lifetime parameter name is a bit # misleading. It determines the number of trading # days between each asset's start_date and end_date, # so assets created with this method actual "live" # for (asset_lifetime + 1) days. But, since pipeline # doesn't show you an asset the day it IPOs, this # number matches the number of days that each asset # should appear in a pipeline output. asset_lifetime=5, exchange=exchange, ) for exchange in cls.EXCHANGE_INFO.exchange ], ignore_index=True, ) assert_equal(out.end_date.max(), cls.END_DATE) return out
def test_compute_lifetimes(self): num_assets = 4 trading_day = self.trading_schedule.day first_start = pd.Timestamp('2015-04-01', tz='UTC') frame = make_rotating_equity_info( num_assets=num_assets, first_start=first_start, frequency=trading_day, periods_between_starts=3, asset_lifetime=5 ) self.write_assets(equities=frame) finder = self.asset_finder all_dates = pd.date_range( start=first_start, end=frame.end_date.max(), freq=trading_day, ) for dates in all_subindices(all_dates): expected_with_start_raw = full( shape=(len(dates), num_assets), fill_value=False, dtype=bool, ) expected_no_start_raw = full( shape=(len(dates), num_assets), fill_value=False, dtype=bool, ) for i, date in enumerate(dates): it = frame[['start_date', 'end_date']].itertuples() for j, start, end in it: # This way of doing the checks is redundant, but very # clear. if start <= date <= end: expected_with_start_raw[i, j] = True if start < date: expected_no_start_raw[i, j] = True expected_with_start = pd.DataFrame( data=expected_with_start_raw, index=dates, columns=frame.index.values, ) result = finder.lifetimes(dates, include_start_date=True) assert_frame_equal(result, expected_with_start) expected_no_start = pd.DataFrame( data=expected_no_start_raw, index=dates, columns=frame.index.values, ) result = finder.lifetimes(dates, include_start_date=False) assert_frame_equal(result, expected_no_start)
def test_compute_lifetimes(self): num_assets = 4 trading_day = self.trading_calendar.day first_start = pd.Timestamp('2015-04-01', tz='UTC') frame = make_rotating_equity_info( num_assets=num_assets, first_start=first_start, frequency=trading_day, periods_between_starts=3, asset_lifetime=5 ) self.write_assets(equities=frame) finder = self.asset_finder all_dates = pd.date_range( start=first_start, end=frame.end_date.max(), freq=trading_day, ) for dates in all_subindices(all_dates): expected_with_start_raw = full( shape=(len(dates), num_assets), fill_value=False, dtype=bool, ) expected_no_start_raw = full( shape=(len(dates), num_assets), fill_value=False, dtype=bool, ) for i, date in enumerate(dates): it = frame[['start_date', 'end_date']].itertuples() for j, start, end in it: # This way of doing the checks is redundant, but very # clear. if start <= date <= end: expected_with_start_raw[i, j] = True if start < date: expected_no_start_raw[i, j] = True expected_with_start = pd.DataFrame( data=expected_with_start_raw, index=dates, columns=frame.index.values, ) result = finder.lifetimes(dates, include_start_date=True) assert_frame_equal(result, expected_with_start) expected_no_start = pd.DataFrame( data=expected_no_start_raw, index=dates, columns=frame.index.values, ) result = finder.lifetimes(dates, include_start_date=False) assert_frame_equal(result, expected_no_start)
def make_equity_info(cls): cls.equity_info = ret = make_rotating_equity_info( num_assets=6, first_start=cls.first_asset_start, frequency=cls.trading_calendar.day, periods_between_starts=4, asset_lifetime=8, ) return ret
def make_equity_info(cls): cls.equity_info = ret = make_rotating_equity_info( num_assets=6, first_start=cls.first_asset_start, frequency=cls.TRADING_ENV_TRADING_CALENDAR.trading_day, periods_between_starts=4, asset_lifetime=8, ) return ret
def test_compute_lifetimes(self): assets_per_exchange = 4 trading_day = self.trading_calendar.day first_start = pd.Timestamp('2015-04-01', tz='UTC') equities = pd.concat( [ make_rotating_equity_info( num_assets=assets_per_exchange, first_start=first_start, frequency=trading_day, periods_between_starts=3, asset_lifetime=5, exchange=exchange, ) for exchange in ( 'US_EXCHANGE_1', 'US_EXCHANGE_2', 'CA_EXCHANGE', 'JP_EXCHANGE', ) ], ignore_index=True, ) # make every symbol unique equities['symbol'] = list(string.ascii_uppercase[:len(equities)]) equities['real_sid'] = equities['symbol'] # shuffle up the sids so they are not contiguous per exchange sids = np.arange(len(equities)) np.random.RandomState(1337).shuffle(sids) equities.index = sids permute_sid = dict(zip(sids, range(len(sids)))).__getitem__ exchanges = pd.DataFrame.from_records([ { 'exchange': 'US_EXCHANGE_1', 'country_code': 'US' }, { 'exchange': 'US_EXCHANGE_2', 'country_code': 'US' }, { 'exchange': 'CA_EXCHANGE', 'country_code': 'CA' }, { 'exchange': 'JP_EXCHANGE', 'country_code': 'JP' }, ]) sids_by_country = { 'US': equities.index[:2 * assets_per_exchange], 'CA': equities.index[2 * assets_per_exchange:3 * assets_per_exchange], 'JP': equities.index[3 * assets_per_exchange:], } self.write_assets(equities=equities, exchanges=exchanges) finder = self.asset_finder all_dates = pd.date_range( start=first_start, end=equities.end_date.max(), freq=trading_day, ) for dates in all_subindices(all_dates): expected_with_start_raw = full( shape=(len(dates), assets_per_exchange), fill_value=False, dtype=bool, ) expected_no_start_raw = full( shape=(len(dates), assets_per_exchange), fill_value=False, dtype=bool, ) for i, date in enumerate(dates): it = equities.iloc[:4][['start_date', 'end_date']].itertuples(index=False, ) for j, (start, end) in enumerate(it): # This way of doing the checks is redundant, but very # clear. if start <= date <= end: expected_with_start_raw[i, j] = True if start < date: expected_no_start_raw[i, j] = True for country_codes in powerset(exchanges.country_code.unique()): expected_sids = pd.Int64Index( sorted( concat(sids_by_country[country_code] for country_code in country_codes))) permuted_sids = [ sid for sid in sorted(expected_sids, key=permute_sid) ] tile_count = len(country_codes) + ('US' in country_codes) expected_with_start = pd.DataFrame( data=np.tile( expected_with_start_raw, tile_count, ), index=dates, columns=pd.Int64Index(permuted_sids), ) result = finder.lifetimes( dates, include_start_date=True, country_codes=country_codes, ) assert_equal(result.columns, expected_sids) result = result[permuted_sids] assert_equal(result, expected_with_start) expected_no_start = pd.DataFrame( data=np.tile( expected_no_start_raw, tile_count, ), index=dates, columns=pd.Int64Index(permuted_sids), ) result = finder.lifetimes( dates, include_start_date=False, country_codes=country_codes, ) assert_equal(result.columns, expected_sids) result = result[permuted_sids] assert_equal(result, expected_no_start)