def test_screen(self): loader = self.loader finder = self.asset_finder asset_ids = array(self.asset_ids) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) num_dates = 5 dates = self.dates[10:10 + num_dates] factor = AssetID() for asset_id in asset_ids: p = Pipeline(columns={'f': factor}, screen=factor <= asset_id) result = engine.run_pipeline(p, dates[0], dates[-1]) expected_sids = asset_ids[asset_ids <= asset_id] expected_assets = finder.retrieve_all(expected_sids) expected_result = DataFrame( index=MultiIndex.from_product([dates, expected_assets]), data=tile(expected_sids.astype(float), [len(dates)]), columns=['f'], ) assert_frame_equal(result, expected_result)
def test_correlation_and_regression_with_bad_asset(self): """ Test that `RollingPearsonOfReturns`, `RollingSpearmanOfReturns` and `RollingLinearRegressionOfReturns` raise the proper exception when given a nonexistent target asset. """ my_asset = Equity( 0, real_sid='0', currency='USD', exchange_info=ExchangeInfo('TEST', 'TEST FULL', 'US'), ) start_date = self.pipeline_start_date end_date = self.pipeline_end_date run_pipeline = self.run_pipeline # This filter is arbitrary; the important thing is that we test each # factor both with and without a specified mask. my_asset_filter = AssetID().eq(1) for mask in (NotSpecified, my_asset_filter): pearson_factor = RollingPearsonOfReturns( target=my_asset, returns_length=3, correlation_length=3, mask=mask, ) spearman_factor = RollingSpearmanOfReturns( target=my_asset, returns_length=3, correlation_length=3, mask=mask, ) regression_factor = RollingLinearRegressionOfReturns( target=my_asset, returns_length=3, regression_length=3, mask=mask, ) with self.assertRaises(NonExistentAssetInTimeFrame): run_pipeline( Pipeline(columns={'pearson_factor': pearson_factor}), start_date, end_date, ) with self.assertRaises(NonExistentAssetInTimeFrame): run_pipeline( Pipeline(columns={'spearman_factor': spearman_factor}), start_date, end_date, ) with self.assertRaises(NonExistentAssetInTimeFrame): run_pipeline( Pipeline(columns={'regression_factor': regression_factor}), start_date, end_date, )
def test_same_day_pipeline(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) factor = AssetID() asset = self.asset_ids[0] p = Pipeline(columns={'f': factor}, screen=factor <= asset) # The crux of this is that when we run the pipeline for a single day # (i.e. start and end dates are the same) we should accurately get # data for the day prior. result = engine.run_pipeline(p, self.dates[1], self.dates[1]) self.assertEqual(result['f'][0], 1.0)
def test_slice_with_masking(self, unmasked_column, slice_column): """ Test that masking a factor that uses slices as inputs does not mask the slice data. """ sids = self.sids asset_finder = self.asset_finder start_date = self.pipeline_start_date end_date = self.pipeline_end_date # Create a filter that masks out all but a single asset. unmasked_asset = asset_finder.retrieve_asset(sids[unmasked_column]) unmasked_asset_only = (AssetID().eq(unmasked_asset.sid)) # Asset used to create our slice. In the cases where this is different # than `unmasked_asset`, our slice should still have non-missing data # when used as an input to our custom factor. That is, it should not be # masked out. slice_asset = asset_finder.retrieve_asset(sids[slice_column]) returns = Returns(window_length=2, inputs=[self.col]) returns_slice = returns[slice_asset] returns_results = self.run_pipeline( Pipeline(columns={'returns': returns}), start_date, end_date, ) returns_results = returns_results['returns'].unstack() class UsesSlicedInput(CustomFactor): window_length = 1 inputs = [returns, returns_slice] def compute(self, today, assets, out, returns, returns_slice): # Ensure that our mask correctly affects the `returns` input # and does not affect the `returns_slice` input. assert returns.shape == (1, 1) assert returns_slice.shape == (1, 1) assert returns[0, 0] == \ returns_results.loc[today, unmasked_asset] assert returns_slice[0, 0] == \ returns_results.loc[today, slice_asset] columns = {'masked': UsesSlicedInput(mask=unmasked_asset_only)} # Assertions about the expected data are made in the `compute` function # of our custom factor above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
def test_factor_regression_method_two_factors(self, regression_length): """ Tests for `Factor.linear_regression` when passed another 2D factor instead of a Slice. """ assets = self.assets dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # The order of these is meant to align with the output of `linregress`. outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr'] # Ensure that the `linear_regression` method cannot be called with two # 2D factors which have different masks. returns_masked_1 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(1), ) returns_masked_2 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(2), ) with self.assertRaises(IncompatibleTerms): returns_masked_1.linear_regression( target=returns_masked_2, regression_length=regression_length, ) returns_5 = Returns(window_length=5, inputs=[self.col]) returns_10 = Returns(window_length=10, inputs=[self.col]) regression_factor = returns_5.linear_regression( target=returns_10, regression_length=regression_length, ) columns = { output: getattr(regression_factor, output) for output in outputs } pipeline = Pipeline(columns=columns) results = run_pipeline(pipeline, start_date, end_date) output_results = {} expected_output_results = {} for output in outputs: output_results[output] = results[output].unstack() expected_output_results[output] = full_like( output_results[output], nan, ) # Run a separate pipeline that calculates returns starting # (regression_length - 1) days prior to our start date. This is because # we need (regression_length - 1) extra days of returns to compute our # expected regressions. columns = {'returns_5': returns_5, 'returns_10': returns_10} results = run_pipeline( Pipeline(columns=columns), dates[start_date_index - (regression_length - 1)], dates[end_date_index], ) returns_5_results = results['returns_5'].unstack() returns_10_results = results['returns_10'].unstack() # On each day, for each asset, calculate the expected regression # results of Y ~ X where Y is the asset's rolling 5 day returns and X # is the asset's rolling 10 day returns. Each regression is calculated # over `regression_length` days of data. for day in range(num_days): todays_returns_5 = returns_5_results.iloc[day:day + regression_length] todays_returns_10 = returns_10_results.iloc[day:day + regression_length] for asset, asset_returns_5 in todays_returns_5.iteritems(): asset_column = int(asset) - 1 asset_returns_10 = todays_returns_10[asset] expected_regression_results = linregress( y=asset_returns_5, x=asset_returns_10, ) for i, output in enumerate(outputs): expected_output_results[output][day, asset_column] = \ expected_regression_results[i] for output in outputs: output_result = output_results[output] expected_output_result = DataFrame( expected_output_results[output], index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(output_result, expected_output_result)
def test_factor_correlation_methods_two_factors(self, correlation_length): """ Tests for `Factor.pearsonr` and `Factor.spearmanr` when passed another 2D factor instead of a Slice. """ assets = self.assets dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # Ensure that the correlation methods cannot be called with two 2D # factors which have different masks. returns_masked_1 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(1), ) returns_masked_2 = Returns( window_length=5, inputs=[self.col], mask=AssetID().eq(2), ) with self.assertRaises(IncompatibleTerms): returns_masked_1.pearsonr( target=returns_masked_2, correlation_length=correlation_length, ) with self.assertRaises(IncompatibleTerms): returns_masked_1.spearmanr( target=returns_masked_2, correlation_length=correlation_length, ) returns_5 = Returns(window_length=5, inputs=[self.col]) returns_10 = Returns(window_length=10, inputs=[self.col]) pearson_factor = returns_5.pearsonr( target=returns_10, correlation_length=correlation_length, ) spearman_factor = returns_5.spearmanr( target=returns_10, correlation_length=correlation_length, ) columns = { 'pearson_factor': pearson_factor, 'spearman_factor': spearman_factor, } pipeline = Pipeline(columns=columns) results = run_pipeline(pipeline, start_date, end_date) pearson_results = results['pearson_factor'].unstack() spearman_results = results['spearman_factor'].unstack() # Run a separate pipeline that calculates returns starting # (correlation_length - 1) days prior to our start date. This is # because we need (correlation_length - 1) extra days of returns to # compute our expected correlations. columns = {'returns_5': returns_5, 'returns_10': returns_10} results = run_pipeline( Pipeline(columns=columns), dates[start_date_index - (correlation_length - 1)], dates[end_date_index], ) returns_5_results = results['returns_5'].unstack() returns_10_results = results['returns_10'].unstack() # On each day, calculate the expected correlation coefficients # between each asset's 5 and 10 day rolling returns. Each correlation # is calculated over `correlation_length` days. expected_pearson_results = full_like(pearson_results, nan) expected_spearman_results = full_like(spearman_results, nan) for day in range(num_days): todays_returns_5 = returns_5_results.iloc[day:day + correlation_length] todays_returns_10 = returns_10_results.iloc[day:day + correlation_length] for asset, asset_returns_5 in todays_returns_5.iteritems(): asset_column = int(asset) - 1 asset_returns_10 = todays_returns_10[asset] expected_pearson_results[day, asset_column] = pearsonr( asset_returns_5, asset_returns_10, )[0] expected_spearman_results[day, asset_column] = spearmanr( asset_returns_5, asset_returns_10, )[0] expected_pearson_results = DataFrame( data=expected_pearson_results, index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(pearson_results, expected_pearson_results) expected_spearman_results = DataFrame( data=expected_spearman_results, index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(spearman_results, expected_spearman_results)