def test_regression_method_bad_type(self): """ Make sure we cannot call the Factor linear regression method on factors or slices that are not of float or int dtype. """ # These are arbitrary for the purpose of this test. returns_length = 2 regression_length = 10 returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[self.my_asset] class BadTypeFactor(CustomFactor): window_length = 1 inputs = [] dtype = datetime64ns_dtype window_safe = True def compute(self, today, assets, out): pass bad_type_factor = BadTypeFactor() bad_type_factor_slice = bad_type_factor[self.my_asset] with self.assertRaises(TypeError): bad_type_factor.linear_regression( target=returns_slice, regression_length=regression_length, ) with self.assertRaises(TypeError): returns.linear_regression( target=bad_type_factor_slice, regression_length=regression_length, )
def make_pipeline(asset_finder): h2o = USEquityPricing.high.latest / USEquityPricing.open.latest l2o = USEquityPricing.low.latest / USEquityPricing.open.latest c2o = USEquityPricing.close.latest / USEquityPricing.open.latest h2c = USEquityPricing.high.latest / USEquityPricing.close.latest l2c = USEquityPricing.low.latest / USEquityPricing.close.latest h2l = USEquityPricing.high.latest / USEquityPricing.low.latest vol = USEquityPricing.volume.latest outstanding = Fundamental(asset_finder).outstanding outstanding.window_safe = True turnover_rate = vol / Latest([outstanding]) returns = Returns(inputs=[USEquityPricing.close], window_length=5) # 预测一周数据 pipe_columns = { 'h2o': h2o.log1p().zscore(), 'l2o': l2o.log1p().zscore(), 'c2o': c2o.log1p().zscore(), 'h2c': h2c.log1p().zscore(), 'l2c': l2c.log1p().zscore(), 'h2l': h2l.log1p().zscore(), 'vol': vol.zscore(), 'turnover_rate': turnover_rate.log1p().zscore(), 'return': returns.log1p(), } # pipe_screen = (low_returns | high_returns) pipe = Pipeline(columns=pipe_columns) return pipe
def test_factor_correlation_methods(self, returns_length, correlation_length): """ Ensure that `Factor.pearsonr` and `Factor.spearmanr` are consistent with the built-in factors `RollingPearsonOfReturns` and `RollingSpearmanOfReturns`. """ my_asset = self.my_asset start_date = self.pipeline_start_date end_date = self.pipeline_end_date run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[my_asset] pearson = returns.pearsonr( target=returns_slice, correlation_length=correlation_length, ) spearman = returns.spearmanr( target=returns_slice, correlation_length=correlation_length, ) expected_pearson = RollingPearsonOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, ) expected_spearman = RollingSpearmanOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, ) # These built-ins construct their own Returns factor to use as inputs, # so the only way to set our own inputs is to do so after the fact. # This should not be done in practice. It is necessary here because we # want Returns to use our random data as an input, but by default it is # using USEquityPricing.close. expected_pearson.inputs = [returns, returns_slice] expected_spearman.inputs = [returns, returns_slice] columns = { 'pearson': pearson, 'spearman': spearman, 'expected_pearson': expected_pearson, 'expected_spearman': expected_spearman, } results = run_pipeline(Pipeline(columns=columns), start_date, end_date) pearson_results = results['pearson'].unstack() spearman_results = results['spearman'].unstack() expected_pearson_results = results['expected_pearson'].unstack() expected_spearman_results = results['expected_spearman'].unstack() assert_frame_equal(pearson_results, expected_pearson_results) assert_frame_equal(spearman_results, expected_spearman_results)
def make_pipeline(): yearly_returns = Returns(window_length=252) monthly_returns = Returns(window_length=21) lagged_returns = yearly_returns - monthly_returns return Pipeline(columns={ 'lagged_returns': lagged_returns, 'marketcap': MyDataSet.marketcap.latest, }, screen=lagged_returns.notnull() & MyDataSet.marketcap.latest.notnull() & MyDataSet.marketcap.latest.top(500))
def make_pipeline(): dollar_volume = AverageDollarVolume(window_length=1) high_dollar_volume = dollar_volume.percentile_between(N, 100) recent_returns = Returns(window_length=N, mask=high_dollar_volume) low_returns = recent_returns.percentile_between(0, 10) high_returns = recent_returns.percentile_between(N, 100) pipe_columns = { 'low_returns': low_returns, 'high_returns': high_returns, 'recent_returns': recent_returns, 'dollar_volume': dollar_volume } pipe_screen = (low_returns | high_returns) pipe = Pipeline(columns=pipe_columns, screen=pipe_screen) return pipe
def mean_reversion_5day_sector_neutral_smoothed(window_length, universe, sector): """ Generate the mean reversion 5 day sector neutral smoothed factor Parameters ---------- window_length : int Returns window length universe : Zipline Filter Universe of stocks filter sector : Zipline Classifier Sector classifier Returns ------- factor : Zipline Factor Mean reversion 5 day sector neutral smoothed factor """ #TODO: Implement function factor = -Returns(window_length=window_length, mask=universe).demean(groupby=sector).rank().zscore() factor_smoothed = SimpleMovingAverage( inputs=[factor], window_length=window_length).rank().zscore() return factor_smoothed
def mean_reversion_5day_sector_neutral(window_length, universe, sector): """ Génération d'un facteur neutre de secteur de retour à la moyenne sur 5 jours Paramètres ---------- window_length : int Retourne les longueurs des fenêtres universe : Zipline Filter Univers de filtre des stocks sector : Zipline Classifier Classifieur de secteur Returns ------- factor : Zipline Factor Facteur neutre de secteur de réversion moyenne sur 5 jours """ factor = -(Returns(window_length=window_length, mask=universe) \ .demean(groupby=sector) \ .rank() \ .zscore()) return factor
def make_pipeline(): all_assets_filter = (USEquityPricing.close.latest > 0) returns_5 = Returns(window_length=5) rsi = RSI(inputs=[USEquityPricing.close]) macd = MovingAverageConvergenceDivergenceSignal(mask=all_assets_filter) ema = ExponentialWeightedMovingAverage(mask=all_assets_filter, inputs=[USEquityPricing.close], window_length=30, decay_rate=(1 - (2.0 / (1 + 15.0)))) mean_5 = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=5, mask=all_assets_filter) mean_10 = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=10, mask=all_assets_filter) bb = BollingerBands(inputs=[USEquityPricing.close], window_length=20, k=2) return Pipeline( columns={ 'returns_5': returns_5, 'RSI': rsi, 'MACD': macd, 'EMA': ema, 'SMA_5': mean_5, 'SMA_10': mean_10, 'bb_upper': bb.upper, 'bb_middle': bb.middle, 'bb_lower': bb.lower }, screen=all_assets_filter, )
class Vol_3M(CustomFactor): # 3 months volatility inputs = [Returns(window_length=2)] window_length = 63 def compute(self, today, assets, out, rets): out[:] = np.nanstd(rets, axis=0)
def test_non_existent_asset(self): """ Test that indexing into a term with a non-existent asset raises the proper exception. """ my_asset = Asset( 0, exchange_info=ExchangeInfo('TEST FULL', 'TEST', 'US'), ) returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[my_asset] class UsesSlicedInput(CustomFactor): window_length = 2 inputs = [returns_slice] def compute(self, today, assets, out, returns_slice): pass with self.assertRaises(NonExistentAssetInTimeFrame): self.run_pipeline( Pipeline(columns={'uses_sliced_input': UsesSlicedInput()}), self.pipeline_start_date, self.pipeline_end_date, )
def pipeline_columns_and_mask(self): factors, universe = self.__make_factors() from collections import OrderedDict factors_pipe = OrderedDict() # Create returns over last n days. factors_pipe['Returns'] = Returns(inputs=[USEquityPricing.close], mask=universe, window_length=5) # Instantiate ranked factors for name, f in factors.items(): f.window_safe = True factors_pipe[name] = f.rank( mask=universe) #rank 使用相对顺序,而不是绝对值,避免自相似性 predict = BasicFactorRegress(inputs=factors_pipe.values(), window_length=42, mask=universe) #进行预测,5天后价格 risk_beta = 0.66 * RollingLinearRegressionOfReturns( target=symbol(risk_benchmark), # sid(8554), returns_length=6, regression_length=21, # mask=long_short_screen mask=(universe), ).beta + 0.33 * 1.0 sector = get_sector() columns = { 'market_beta': risk_beta, 'sector': sector, 'predict': predict, } return columns, universe
def test_slice(self, my_asset_column, window_length_): """ Test that slices can be created by indexing into a term, and that they have the correct shape when used as inputs. """ sids = self.sids my_asset = self.asset_finder.retrieve_asset(self.sids[my_asset_column]) returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[my_asset] class UsesSlicedInput(CustomFactor): window_length = window_length_ inputs = [returns, returns_slice] def compute(self, today, assets, out, returns, returns_slice): # Make sure that our slice is the correct shape (i.e. has only # one column) and that it has the same values as the original # returns factor from which it is derived. assert returns_slice.shape == (self.window_length, 1) assert returns.shape == (self.window_length, len(sids)) check_arrays(returns_slice[:, 0], returns[:, my_asset_column]) # Assertions about the expected slice data are made in the `compute` # function of our custom factor above. self.run_pipeline( Pipeline(columns={'uses_sliced_input': UsesSlicedInput()}), self.pipeline_start_date, self.pipeline_end_date, )
class SimpleMomentum(CustomFactor): # will give us the returns from last month inputs = [Returns(window_length=20)] window_length = 20 def compute(self, today, assets, out, lag_returns): out[:] = lag_returns[0]
def mean_reversion_5day_sector_neutral_smoothed(window_length, universe, sector): unsmoothed_factor = -Returns( window_length=window_length, mask=universe).demean(groupby=sector).rank().zscore() return SimpleMovingAverage(inputs=[unsmoothed_factor], window_length=window_length).rank().zscore()
def initialize(context): """ use our factors to add our pipes and screens. """ pipe = Pipeline() attach_pipeline(pipe, 'ff_example') mkt_cap = MarketEquity() pipe.add(mkt_cap, 'market_cap') book_equity = BookEquity() # book equity over market equity bm = book_equity/mkt_cap pipe.add(bm, 'bm') # 营运能力 op = OP() pipe.add(op, 'op') # 投资因子 inv = INV() pipe.add(inv, 'inv') returns = Returns(window_length=2) pipe.add(returns, 'returns') dt = get_datetime().normalize() start_ = dt if dt > START_DATE else START_DATE context.result = result.loc[start_: , :]
class Mean_Reversion_1M(CustomFactor): # standardized difference between latest monthly return # and their annual average inputs = [Returns(window_length=21)] window_length = 252 def compute(self, today, assets, out, monthly_rets): out[:] = (monthly_rets[-1] - np.nanmean(monthly_rets, axis=0)) / np.nanstd(monthly_rets, axis=0)
def make_pipeline(context): """ A function to create our pipeline (dynamic stock selector). The pipeline is used to rank stocks based on different factors, including builtin factors, or custom factors that you can define. Documentation on pipeline can be found here: https://www.quantopian.com/help#pipeline-title """ # Create a pipeline object. # Create a dollar_volume factor using default inputs and window_length. # This is a builtin factor. dollar_volume = AverageDollarVolume(window_length=1) # Define high dollar-volume filter to be the top 2% of stocks by dollar # volume. high_dollar_volume = dollar_volume.percentile_between(95, 100) # Create a recent_returns factor with a 5-day returns lookback for all securities # in our high_dollar_volume Filter. This is a custom factor defined below (see # RecentReturns class). recent_returns = Returns( window_length=16, mask=high_dollar_volume) # Define high and low returns filters to be the bottom 1% and top 1% of # securities in the high dollar-volume group. low_returns = recent_returns.percentile_between(0, 5) high_returns = recent_returns.percentile_between(95, 100) # Define a column dictionary that holds all the Factors pipe_columns = { 'low_returns': low_returns, 'high_returns': high_returns, 'recent_returns': recent_returns, 'dollar_volume': dollar_volume } # Add a filter to the pipeline such that only high-return and low-return # securities are kept. # pipe_screen = (low_returns & liquidity_filter | high_returns & vol_filter) pipe_screen = (low_returns | high_returns) # Create a pipeline object with the defined columns and screen. pipe = Pipeline(columns=pipe_columns, screen=pipe_screen) return pipe
class MeanReversion(CustomFactor): """Compute ratio of latest monthly return to 12m average, normalized by std dev of monthly returns""" inputs = [Returns(window_length=MONTH)] window_length = YEAR def compute(self, today, assets, out, monthly_returns): df = pd.DataFrame(monthly_returns) out[:] = df.iloc[-1].sub(df.mean()).div(df.std())
def test_returns(self, seed_value, window_length): returns = Returns(window_length=window_length) today = datetime64(1, 'ns') assets = arange(3) out = empty((3, ), dtype=float) seed(seed_value) # Seed so we get deterministic results. test_data = abs(randn(window_length, 3)) # Calculate the expected returns expected = (test_data[-1] - test_data[0]) / test_data[0] out = empty((3, ), dtype=float) returns.compute(today, assets, out, test_data) check_allclose(expected, out)
def test_returns(self, seed_value, window_length): returns = Returns(window_length=window_length) today = datetime64(1, 'ns') assets = arange(3) out = empty((3,), dtype=float) seed(seed_value) # Seed so we get deterministic results. test_data = abs(randn(window_length, 3)) # Calculate the expected returns expected = (test_data[-1] - test_data[0]) / test_data[0] out = empty((3,), dtype=float) returns.compute(today, assets, out, test_data) check_allclose(expected, out)
def make_pipeline(assets): WEIGHT1 = 1.0 WEIGHT2 = 1.0 WEIGHT3 = 1.0 WEIGHT4 = 1.0 etf_universe = StaticAssets(assets) day20_ret = Returns(inputs=[USEquityPricing.close], window_length=21, mask=etf_universe) day3mo_ret = Returns(inputs=[USEquityPricing.close], window_length=63, mask=etf_universe) day6mo_ret = Returns(inputs=[USEquityPricing.close], window_length=126, mask=etf_universe) day1yr_ret = Returns(inputs=[USEquityPricing.close], window_length=252, mask=etf_universe) volatility = AnnualizedVolatility(mask=etf_universe) score = ((WEIGHT1 * day20_ret) + (WEIGHT1 * day3mo_ret) + (WEIGHT3 * day6mo_ret) + (WEIGHT3 * day1yr_ret)) / (volatility) high = USEquityPricing.high.latest low = USEquityPricing.low.latest open_price = USEquityPricing.open.latest close = USEquityPricing.close.latest volume = USEquityPricing.volume.latest pipe = Pipeline(columns={ 'Score': score, 'Day20': day20_ret, 'high': high, 'low': low, 'close': close, 'open_price': open_price, 'volume': volume, }, screen=etf_universe) return pipe
def make_pipeline(): """ Create a pipeline that filters by dollar volume and calculates 1-year return. """ pipeline = Pipeline(columns={ "1y_returns": Returns(window_length=252), }, screen=AverageDollarVolume(window_length=30) > 10e6) return pipeline
def make_pipeline(): russell_universe = StaticAssets( symbols( ['AAPL', 'AA', 'KKD', 'MON', 'SPY', 'XOM', 'JNJ', 'HD', 'MSFT'])) filt = AverageDollarVolume(window_length=30, mask=russell_universe) > 10e6 pipeline = Pipeline(columns={ "1y_returns": Returns(window_length=252), }, screen=filt) return pipeline
class Cud(CustomFactor): window_length = 131 dailyreturns = (Returns(window_length=2) ) inputs = [dailyreturns] def compute (self, today, assets, out, dailyreturns) : cumrets = np.cumsum(np.log(dailyreturns[-131:-1,:]+1), axis =0)# weeklyrets = np.diff(cumrets,5,axis=0)[::5] total_count = weeklyrets.shape[0] up_count = np.sum((weeklyrets > 0), axis =0) if VERBOSE: print('check type:',weeklyrets[:,20]) if VERBOSE: print('up/total', up_count[20], total_count) out[:] = up_count
def test_factor_regression_method(self, returns_length, regression_length): """ Ensure that `Factor.linear_regression` is consistent with the built-in factor `RollingLinearRegressionOfReturns`. """ my_asset = self.my_asset start_date = self.pipeline_start_date end_date = self.pipeline_end_date run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[my_asset] regression = returns.linear_regression( target=returns_slice, regression_length=regression_length, ) expected_regression = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, ) # This built-in constructs its own Returns factor to use as an input, # so the only way to set our own input is to do so after the fact. This # should not be done in practice. It is necessary here because we want # Returns to use our random data as an input, but by default it is # using USEquityPricing.close. expected_regression.inputs = [returns, returns_slice] columns = { 'regression': regression, 'expected_regression': expected_regression, } results = run_pipeline(Pipeline(columns=columns), start_date, end_date) regression_results = results['regression'].unstack() expected_regression_results = results['expected_regression'].unstack() assert_frame_equal(regression_results, expected_regression_results)
def pipeline_columns_and_mask(self): universe = make_china_equity_universe( target_size=3000, mask=default_china_equity_universe_mask([risk_benchmark]), max_group_weight=0.01, smoothing_func=lambda f: f.downsample('month_start'), ) private_universe = private_universe_mask( self.portfolio.index) # 把当前组合的stock 包含在universe中 last_price = USEquityPricing.close.latest >= 1.0 # 大于1元 universe = (universe & last_price) & ~private_universe # print "universe:",universe # Instantiate ranked factors returns = Returns(inputs=[USEquityPricing.close], mask=universe, window_length=2) risk_beta = 0.66 * RollingLinearRegressionOfReturns( target=symbol(risk_benchmark), # sid(8554), returns_length=5, regression_length=21, # mask=long_short_screen mask=(universe), ).beta + 0.33 * 1.0 returns.window_safe = True risk_beta.window_safe = True predict = RNNPredict(universe, trigger_date=self.predict_time) # 进行回顾 universe = predict.top(20) weights = Markowitz(inputs=[returns, risk_beta], window_length=4, mask=universe, trigger_date=self.predict_time) #进行回顾 columns = { 'predict': predict, 'weights': weights, } return columns, universe
def make_pipeline(): # universe = make_china_equity_universe( # target_size = 100, # mask = default_china_equity_universe_mask(['000001']), # max_group_weight= 0.01, # smoothing_func = lambda f: f.downsample('week_start'), # ) dollar_volume = AverageDollarVolume(window_length=1) high_dollar_volume = dollar_volume.percentile_between(N, 100) recent_returns = Returns(window_length=N, mask=high_dollar_volume) low_returns = recent_returns.percentile_between(0, 10) high_returns = recent_returns.percentile_between(N, 100) pipe_columns = { 'low_returns': low_returns, 'high_returns': high_returns, 'recent_returns': recent_returns, 'dollar_volume': dollar_volume } pipe_screen = (low_returns | high_returns) pipe = Pipeline(columns=pipe_columns, screen=pipe_screen) return pipe
def pipeline_columns_and_mask(self): ''' universe = make_china_equity_universe( target_size=3000, mask=default_china_equity_universe_mask([RISK_BENCHMARK]), max_group_weight=0.01, smoothing_func=lambda f: f.downsample('month_start'), ) private_universe = private_universe_mask(self.portfolio.index) # 把当前组合的stock 包含在universe中 ''' last_price = USEquityPricing.close.latest >= 1.0 # 大于1元 sector = get_sector() sector_filter = sector != 0.0 universe = last_price & sector_filter # print "universe:",universe # Instantiate ranked factors returns = Returns(inputs=[USEquityPricing.close], mask=universe, window_length=2) risk_beta = 0.66 * RollingLinearRegressionOfReturns( target=symbol(RISK_BENCHMARK), returns_length=5, regression_length=21, # mask=long_short_screen mask=(universe), ).beta + 0.33 * 1.0 returns.window_safe = True risk_beta.window_safe = True predict = RNNPredict(universe, source='predict.csv', trigger_date=self.predict_time) # 进行回顾 columns = { 'predict': predict, 'market_beta': risk_beta, 'sector': sector, } return columns, universe
def get_returns(self, start_date, end_date, assets=None): open_return = Returns(window_length=2, inputs=[USEquityPricing.open]) close_return = Returns(window_length=2, inputs=[USEquityPricing.close]) inter_day_return = InterDayReturns() intra_day_return = IntraDayReturns() pipeline = Pipeline( columns={ 'open_return': open_return, 'close_return': close_return, 'inter_day_return': inter_day_return, 'intra_day_return': intra_day_return }) if assets is not None: pipeline.set_screen(StaticAssets(assets)) df = self._run_pipeline(pipeline, start_date, end_date) df['open_return'] = df.groupby( df.index.get_level_values('asset'))['open_return'].shift(-1) df.dropna(inplace=True) return df
def make_pipeline(context): ''' A function to create our dynamic stock selector (pipeline). Documentation on pipeline can be found here: https://www.quantopian.com/help#pipeline-title ''' universe = context.universe # Factor of yesterday's close price. day1mo_ret = Returns(inputs=[USEquityPricing.close], window_length=21, mask=universe) day3mo_ret = Returns(inputs=[USEquityPricing.close], window_length=63, mask=universe) day6mo_ret = Returns(inputs=[USEquityPricing.close], window_length=126, mask=universe) day9mo_ret = Returns(inputs=[USEquityPricing.close], window_length=189, mask=universe) day1yr_ret = Returns(inputs=[USEquityPricing.close], window_length=252, mask=universe) volatility = AnnualizedVolatility(mask=universe) score = ((WEIGHT1 * day1mo_ret) + (WEIGHT2 * day3mo_ret) + (WEIGHT3 * day6mo_ret) + (WEIGHT3 * day9mo_ret) + (WEIGHT5 * day1yr_ret)) / (volatility) high = USEquityPricing.high.latest low = USEquityPricing.low.latest open_price = USEquityPricing.open.latest close = USEquityPricing.close.latest volume = USEquityPricing.volume.latest pipe_columns = { 'Score': score, 'Day1mo': day1mo_ret, 'high': high, 'low': low, 'close': close, 'open_price': open_price, 'volume': volume, } pipe = Pipeline(columns=pipe_columns,screen=universe) return pipe
def test_slice_with_masking(self, unmasked_column, slice_column): """ Test that masking a factor that uses slices as inputs does not mask the slice data. """ sids = self.sids asset_finder = self.asset_finder start_date = self.pipeline_start_date end_date = self.pipeline_end_date # Create a filter that masks out all but a single asset. unmasked_asset = asset_finder.retrieve_asset(sids[unmasked_column]) unmasked_asset_only = (AssetID().eq(unmasked_asset.sid)) # Asset used to create our slice. In the cases where this is different # than `unmasked_asset`, our slice should still have non-missing data # when used as an input to our custom factor. That is, it should not be # masked out. slice_asset = asset_finder.retrieve_asset(sids[slice_column]) returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[slice_asset] returns_results = self.run_pipeline( Pipeline(columns={'returns': returns}), start_date, end_date, ) returns_results = returns_results['returns'].unstack() class UsesSlicedInput(CustomFactor): window_length = 1 inputs = [returns, returns_slice] def compute(self, today, assets, out, returns, returns_slice): # Ensure that our mask correctly affects the `returns` input # and does not affect the `returns_slice` input. assert returns.shape == (1, 1) assert returns_slice.shape == (1, 1) assert returns[0, 0] == \ returns_results.loc[today, unmasked_asset] assert returns_slice[0, 0] == \ returns_results.loc[today, slice_asset] columns = {'masked': UsesSlicedInput(mask=unmasked_asset_only)} # Assertions about the expected data are made in the `compute` function # of our custom factor above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
def test_factor_correlation_methods_two_factors(self, correlation_length): """ Tests for `Factor.pearsonr` and `Factor.spearmanr` when passed another 2D factor instead of a Slice. """ assets = self.assets dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # Ensure that the correlation methods cannot be called with two 2D # factors which have different masks. returns_masked_1 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(1), ) returns_masked_2 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(2), ) with self.assertRaises(IncompatibleTerms): returns_masked_1.pearsonr( target=returns_masked_2, correlation_length=correlation_length, ) with self.assertRaises(IncompatibleTerms): returns_masked_1.spearmanr( target=returns_masked_2, correlation_length=correlation_length, ) returns_5 = Returns(window_length=5, inputs=[self.col]) returns_10 = Returns(window_length=10, inputs=[self.col]) pearson_factor = returns_5.pearsonr( target=returns_10, correlation_length=correlation_length, ) spearman_factor = returns_5.spearmanr( target=returns_10, correlation_length=correlation_length, ) columns = { 'pearson_factor': pearson_factor, 'spearman_factor': spearman_factor, } pipeline = Pipeline(columns=columns) results = run_pipeline(pipeline, start_date, end_date) pearson_results = results['pearson_factor'].unstack() spearman_results = results['spearman_factor'].unstack() # Run a separate pipeline that calculates returns starting # (correlation_length - 1) days prior to our start date. This is # because we need (correlation_length - 1) extra days of returns to # compute our expected correlations. columns = {'returns_5': returns_5, 'returns_10': returns_10} results = run_pipeline( Pipeline(columns=columns), dates[start_date_index - (correlation_length - 1)], dates[end_date_index], ) returns_5_results = results['returns_5'].unstack() returns_10_results = results['returns_10'].unstack() # On each day, calculate the expected correlation coefficients # between each asset's 5 and 10 day rolling returns. Each correlation # is calculated over `correlation_length` days. expected_pearson_results = full_like(pearson_results, nan) expected_spearman_results = full_like(spearman_results, nan) for day in range(num_days): todays_returns_5 = returns_5_results.iloc[ day:day + correlation_length ] todays_returns_10 = returns_10_results.iloc[ day:day + correlation_length ] for asset, asset_returns_5 in todays_returns_5.iteritems(): asset_column = int(asset) - 1 asset_returns_10 = todays_returns_10[asset] expected_pearson_results[day, asset_column] = pearsonr( asset_returns_5, asset_returns_10, )[0] expected_spearman_results[day, asset_column] = spearmanr( asset_returns_5, asset_returns_10, )[0] expected_pearson_results = DataFrame( data=expected_pearson_results, index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(pearson_results, expected_pearson_results) expected_spearman_results = DataFrame( data=expected_spearman_results, index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(spearman_results, expected_spearman_results)
def test_factor_regression_method_two_factors(self, regression_length): """ Tests for `Factor.linear_regression` when passed another 2D factor instead of a Slice. """ assets = self.assets dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # The order of these is meant to align with the output of `linregress`. outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr'] # Ensure that the `linear_regression` method cannot be called with two # 2D factors which have different masks. returns_masked_1 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(1), ) returns_masked_2 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(2), ) with self.assertRaises(IncompatibleTerms): returns_masked_1.linear_regression( target=returns_masked_2, regression_length=regression_length, ) returns_5 = Returns(window_length=5, inputs=[self.col]) returns_10 = Returns(window_length=10, inputs=[self.col]) regression_factor = returns_5.linear_regression( target=returns_10, regression_length=regression_length, ) columns = { output: getattr(regression_factor, output) for output in outputs } pipeline = Pipeline(columns=columns) results = run_pipeline(pipeline, start_date, end_date) output_results = {} expected_output_results = {} for output in outputs: output_results[output] = results[output].unstack() expected_output_results[output] = full_like( output_results[output], nan, ) # Run a separate pipeline that calculates returns starting # (regression_length - 1) days prior to our start date. This is because # we need (regression_length - 1) extra days of returns to compute our # expected regressions. columns = {'returns_5': returns_5, 'returns_10': returns_10} results = run_pipeline( Pipeline(columns=columns), dates[start_date_index - (regression_length - 1)], dates[end_date_index], ) returns_5_results = results['returns_5'].unstack() returns_10_results = results['returns_10'].unstack() # On each day, for each asset, calculate the expected regression # results of Y ~ X where Y is the asset's rolling 5 day returns and X # is the asset's rolling 10 day returns. Each regression is calculated # over `regression_length` days of data. for day in range(num_days): todays_returns_5 = returns_5_results.iloc[ day:day + regression_length ] todays_returns_10 = returns_10_results.iloc[ day:day + regression_length ] for asset, asset_returns_5 in todays_returns_5.iteritems(): asset_column = int(asset) - 1 asset_returns_10 = todays_returns_10[asset] expected_regression_results = linregress( y=asset_returns_5, x=asset_returns_10, ) for i, output in enumerate(outputs): expected_output_results[output][day, asset_column] = \ expected_regression_results[i] for output in outputs: output_result = output_results[output] expected_output_result = DataFrame( expected_output_results[output], index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(output_result, expected_output_result)
def test_factor_regression_method(self, returns_length, regression_length): """ Ensure that `Factor.linear_regression` is consistent with the built-in factor `RollingLinearRegressionOfReturns`. """ my_asset = self.asset_finder.retrieve_asset(self.sids[0]) returns = Returns(window_length=returns_length, inputs=[self.col]) returns_slice = returns[my_asset] regression = returns.linear_regression( target=returns_slice, regression_length=regression_length, ) expected_regression = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, ) # These built-ins construct their own Returns factor to use as inputs, # so the only way to set our own inputs is to do so after the fact. # This should not be done in practice. It is necessary here because we # want Returns to use our random data as an input, but by default it is # using USEquityPricing.close. expected_regression.inputs = [returns, returns_slice] columns = { 'regression': regression, 'expected_regression': expected_regression, } results = self.run_pipeline( Pipeline(columns=columns), self.pipeline_start_date, self.pipeline_end_date, ) regression_results = results['regression'].unstack() expected_regression_results = results['expected_regression'].unstack() assert_frame_equal(regression_results, expected_regression_results) # Make sure we cannot call the linear regression method on factors or # slices of dtype `datetime64[ns]`. class DateFactor(CustomFactor): window_length = 1 inputs = [] dtype = datetime64ns_dtype window_safe = True def compute(self, today, assets, out): pass date_factor = DateFactor() date_factor_slice = date_factor[my_asset] with self.assertRaises(TypeError): date_factor.linear_regression( target=returns_slice, regression_length=regression_length, ) with self.assertRaises(TypeError): returns.linear_regression( target=date_factor_slice, regression_length=regression_length, )