def _test_id(self, df, dshape, expected, finder, add): expr = bz.data(df, name='expr', dshape=dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() for a in add: p.add(getattr(ds, a).latest, a) dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) assert_frame_equal( result, _utc_localize_index_level_0(expected), check_dtype=False, )
def test_id(self): expr = bz.Data(self.df, name='expr', dshape=self.dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = self.df.drop('asof_date', axis=1).set_index( ['timestamp', 'sid'], ) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_custom_query_time_tz(self): df = self.df.copy() df["timestamp"] = ( (pd.DatetimeIndex(df["timestamp"], tz="EST") + timedelta(hours=8, minutes=44)) .tz_convert("utc") .tz_localize(None) ) df.ix[3:5, "timestamp"] = pd.Timestamp("2014-01-01 13:45") expr = bz.Data(df, name="expr", dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz="EST") ds = from_blaze(expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values) p = Pipeline() p.add(ds.value.latest, "value") p.add(ds.int_value.latest, "int_value") dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine(loader, dates, finder).run_pipeline(p, dates[0], dates[-1]) expected = df.drop("asof_date", axis=1) expected["timestamp"] = expected["timestamp"].dt.normalize().astype("datetime64[ns]").dt.tz_localize("utc") expected.ix[3:5, "timestamp"] += timedelta(days=1) expected.set_index(["timestamp", "sid"], inplace=True) expected.index = pd.MultiIndex.from_product( (expected.index.levels[0], finder.retrieve_all(expected.index.levels[1])) ) assert_frame_equal(result, expected, check_dtype=False)
def test_id_macro_dataset(self): expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) nassets = len(asset_info) expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value',), ) assert_frame_equal(result, expected, check_dtype=False)
def _run_pipeline( self, expr, deltas, expected_views, expected_output, finder, calendar, start, end, window_length, compute_fn ): loader = BlazeLoader() ds = from_blaze( expr, deltas, loader=loader, no_deltas_rule=no_deltas_rules.raise_, missing_values=self.missing_values ) p = Pipeline() # prevent unbound locals issue in the inner class window_length_ = window_length class TestFactor(CustomFactor): inputs = (ds.value,) window_length = window_length_ def compute(self, today, assets, out, data): assert_array_almost_equal(data, expected_views[today]) out[:] = compute_fn(data) p.add(TestFactor(), "value") result = SimplePipelineEngine(loader, calendar, finder).run_pipeline(p, start, end) assert_frame_equal(result, _utc_localize_index_level_0(expected_output), check_dtype=False)
def test_id_macro_dataset_multiple_columns(self): """ input (df): asof_date timestamp other value 0 2014-01-01 2014-01-01 1 0 3 2014-01-02 2014-01-02 2 1 6 2014-01-03 2014-01-03 3 2 output (expected): other value 2014-01-01 Equity(65 [A]) 1 0 Equity(66 [B]) 1 0 Equity(67 [C]) 1 0 2014-01-02 Equity(65 [A]) 2 1 Equity(66 [B]) 2 1 Equity(67 [C]) 2 1 2014-01-03 Equity(65 [A]) 3 2 Equity(66 [B]) 3 2 Equity(67 [C]) 3 2 """ df = self.macro_df.copy() df['other'] = df.value + 1 fields = OrderedDict(self.macro_dshape.measure.fields) fields['other'] = fields['value'] expr = bz.Data(df, name='expr', dshape=var * Record(fields)) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.other.latest, 'other') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = pd.DataFrame( np.array([[0, 1], [1, 2], [2, 3]]).repeat(3, axis=0), index=pd.MultiIndex.from_product(( df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', 'other'), ).sort_index(axis=1) assert_frame_equal( result, expected.sort_index(axis=1), check_dtype=False, )
def test_factor_with_multiple_outputs(self): dates = self.dates[5:10] assets = self.assets asset_ids = self.asset_ids constants = self.constants num_dates = len(dates) num_assets = len(assets) open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine( lambda column: self.loader, self.dates, self.asset_finder, ) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = make_cascading_boolean_array( shape=(num_dates, num_assets), ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = make_alternating_boolean_array( shape=(num_dates, num_assets), first_value=False, ) expected_no_mask_result = full( shape=(num_dates, num_assets), fill_value=True, dtype=bool_dtype, ) masks = cascading_mask, alternating_mask, NotSpecified expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): open_price, close_price = MultipleOutputs(mask=mask) pipeline = Pipeline( columns={'open_price': open_price, 'close_price': close_price}, ) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, case_column in (('open_price', open), ('close_price', close)): if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) output_results = results[colname].unstack() output_expected = create_expected_results( constants[case_column], expected_mask, ) assert_frame_equal(output_results, output_expected)
def test_conflict_between_outputs(self): class D(DataSet): c = Column(float) D_US = D.specialize(US_EQUITIES) D_CA = D.specialize(CA_EQUITIES) pipe = Pipeline({"f": D_US.c.latest, "g": D_CA.c.latest}) with self.assertRaises(AmbiguousDomain) as e: pipe.domain(default=GENERIC) self.assertEqual(e.exception.domains, [CA_EQUITIES, US_EQUITIES])
def initialize(context): # Create, register and name a pipeline in initialize. pipe = Pipeline() attach_pipeline(pipe, 'example') # Construct a simple moving average factor and add it to the pipeline. sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=10) pipe.add(sma_short, 'sma_short') # Set a screen on the pipelines to filter out securities. pipe.set_screen(sma_short > 1.0)
def test_adding_slice_column(self): """ Test that slices cannot be added as a pipeline column. """ my_asset = self.asset_finder.retrieve_asset(self.sids[0]) open_slice = OpenPrice()[my_asset] with self.assertRaises(UnsupportedPipelineOutput): Pipeline(columns={'open_slice': open_slice}) pipe = Pipeline(columns={}) with self.assertRaises(UnsupportedPipelineOutput): pipe.add(open_slice, 'open_slice')
def test_mean_reversion_5day_sector_neutral_smoothed(fn): column_name = 'Mean_Reversion_5Day_Sector_Neutral_Smoothed' start_date_str = '2015-01-05' end_date_str = '2015-01-07' # Build engine trading_calendar = get_calendar('NYSE') bundle_data = bundles.load(project_helper.EOD_BUNDLE_NAME) engine = project_helper.build_pipeline_engine(bundle_data, trading_calendar) # Build pipeline universe_window_length = 2 universe_asset_count = 4 universe = AverageDollarVolume(window_length=universe_window_length).top(universe_asset_count) pipeline = Pipeline(screen=universe) run_pipeline_args = { 'pipeline': pipeline, 'start_date': pd.Timestamp(start_date_str, tz='utc'), 'end_date': pd.Timestamp(end_date_str, tz='utc')} fn_inputs = { 'window_length': 3, 'universe': universe, 'sector': project_helper.Sector()} fn_correct_outputs = OrderedDict([ ( 'pipline_out', pd.DataFrame( [0.44721360, 1.34164079, -1.34164079, -0.44721360, 1.34164079, 0.44721360, -1.34164079, -0.44721360, 0.44721360, 1.34164079, -1.34164079, -0.44721360], engine.run_pipeline(**run_pipeline_args).index, [column_name]))]) print('Running Integration Test on pipeline:') print('> start_dat = pd.Timestamp(\'{}\', tz=\'utc\')'.format(start_date_str)) print('> end_date = pd.Timestamp(\'{}\', tz=\'utc\')'.format(end_date_str)) print('> universe = AverageDollarVolume(window_length={}).top({})'.format( universe_window_length, universe_asset_count)) print('> factor = {}('.format(fn.__name__)) print(' window_length={},'.format(fn_inputs['window_length'])) print(' universe=universe,') print(' sector=project_helper.Sector())') print('> pipeline.add(factor, \'{}\')'.format(column_name)) print('> engine.run_pipeline(pipeline, start_dat, end_date)') print('') pipeline.add(fn(**fn_inputs), column_name) assert_output(engine.run_pipeline, run_pipeline_args, fn_correct_outputs, check_parameter_changes=False)
def initialize(context): # Create, register and name a pipeline in initialize. pipe = Pipeline() context.attach_pipeline(pipe, 'AAPL') # Construct a simple moving average factor and add it to the pipeline. USEquityPricing需要本地自定义 if True: sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=10) else:#mid added data = Column(float64) dataset = DataSet() close = data.bind(dataset, 'aapl') sma_short = SimpleMovingAverage(inputs=[close], window_length=10) pipe.add(sma_short, 'sma_short')
def initialize(context): pipe = Pipeline() attach_pipeline(pipe, 'example') # Note that we don't call add_factor on these Factors. # We don't need to store intermediate values if we're not going to use them sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=30) sma_long = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=100) sma_val = sma_short/sma_long # Construct the custom factor mkt_cap = MarketCap() # Create and apply a filter representing the top 500 equities by MarketCap # every day. mkt_cap_top_500 = mkt_cap.top(500) remove_penny_stocks = sma_short > 1.0 pipe.add(sma_val, 'sma_val') pipe.add(mkt_cap, 'mkt_cap') # Use mkt_cap_top_500 as a mask on rank pipe.add(sma_val.rank(mask=mkt_cap_top_500), 'sma_rank') # Use multiple screens to narrow the universe pipe.set_screen(mkt_cap.top(500) & remove_penny_stocks)
def test_add(self): p = Pipeline() f = SomeFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) p.add(f > 5, 'g') self.assertEqual(p.columns, {'f': f, 'g': f > 5}) with self.assertRaises(TypeError): p.add(f, 1) with self.assertRaises(TypeError): p.add(USEquityPricing.open, 'open')
def test_add(self): p = Pipeline() f = SomeFactor() p.add(f, "f") self.assertEqual(p.columns, {"f": f}) p.add(f > 5, "g") self.assertEqual(p.columns, {"f": f, "g": f > 5}) with self.assertRaises(TypeError): p.add(f, 1)
def test_custom_query_time_tz(self): df = self.df.copy() df['timestamp'] = ( pd.DatetimeIndex(df['timestamp'], tz='EST') + timedelta(hours=8, minutes=44) ).tz_convert('utc').tz_localize(None) df.ix[3:5, 'timestamp'] = pd.Timestamp('2014-01-01 13:45') expr = bz.data(df, name='expr', dshape=self.dshape) loader = BlazeLoader(data_query_time=time(8, 45), data_query_tz='EST') ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, missing_values=self.missing_values, ) p = Pipeline() p.add(ds.value.latest, 'value') p.add(ds.int_value.latest, 'int_value') dates = self.dates with tmp_asset_finder() as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) expected = df.drop('asof_date', axis=1) expected['timestamp'] = expected['timestamp'].dt.normalize().astype( 'datetime64[ns]', ).dt.tz_localize('utc') expected.ix[3:5, 'timestamp'] += timedelta(days=1) expected.set_index(['timestamp', 'sid'], inplace=True) expected.index = pd.MultiIndex.from_product(( expected.index.levels[0], finder.retrieve_all(expected.index.levels[1]), )) assert_frame_equal(result, expected, check_dtype=False)
def test_add(self): p = Pipeline('test') f = SomeFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) p.add(f > 5, 'g') self.assertEqual(p.columns, {'f': f, 'g': f > 5}) with self.assertRaises(TypeError): p.add(f, 1)
def test_set_screen(self): f, g = SomeFilter(), SomeOtherFilter() p = Pipeline() self.assertEqual(p.screen, None) p.set_screen(f) self.assertEqual(p.screen, f) with self.assertRaises(ValueError): p.set_screen(f) p.set_screen(g, overwrite=True) self.assertEqual(p.screen, g) with self.assertRaises(TypeError) as e: p.set_screen(f, g) message = e.exception.args[0] self.assertIn("expected a value of type bool or int for argument 'overwrite'", message)
def make_pipeinit(context): universe = context.etf_universe factors = make_factor() pipeline_columns = {} for f in factors.keys(): for days_ago in reversed(range(WINDOW_LENGTH)): pipeline_columns[f + '-' + str(days_ago)] = Factor_N_Days_Ago( [factors[f](mask=universe)], window_length=days_ago + 1, mask=universe) pipe = Pipeline(columns=pipeline_columns, screen=universe) return pipe
def test_simple_beta_matches_regression(self): run_pipeline = self.run_pipeline simple_beta = SimpleBeta(target=self.my_asset, regression_length=10) complex_beta = RollingLinearRegressionOfReturns( target=self.my_asset, returns_length=2, regression_length=10, ).beta pipe = Pipeline({'simple': simple_beta, 'complex': complex_beta}) results = run_pipeline( pipe, self.pipeline_start_date, self.pipeline_end_date, ) assert_equal(results['simple'], results['complex'], check_names=False)
def test_masked_single_column_output(self): """ Tests for masking custom factors that compute a 1D out. """ start_date = self.pipeline_start_date end_date = self.pipeline_end_date alternating_mask = (AssetIDPlusDay() % 2).eq(0) cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day) alternating_mask.window_safe = True cascading_mask.window_safe = True for mask in (alternating_mask, cascading_mask): class SingleColumnOutput(CustomFactor): window_length = 1 inputs = [self.col, mask] window_safe = True ndim = 1 def compute(self, today, assets, out, col, mask): # Because we specified ndim as 1, `out` should always be a # singleton array but `close` should be a sized based on # the mask we passed. assert out.shape == (1, ) assert col.shape == (1, mask.sum()) out[:] = col.sum() # Since we cannot add single column output factors as pipeline # columns, we have to test its output through another factor. class UsesSingleColumnInput(CustomFactor): window_length = 1 inputs = [self.col, mask, SingleColumnOutput(mask=mask)] def compute(self, today, assets, out, col, mask, single_column_output): # Make sure that `single_column` has the correct value # based on the masked it used. assert single_column_output.shape == (1, 1) single_column_output_value = single_column_output[0][0] expected_value = where(mask, col, 0).sum() assert single_column_output_value == expected_value columns = {'uses_single_column_input': UsesSingleColumnInput()} # Assertions about the expected shapes of our data are made in the # `compute` function of our custom factors above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
def test_SMA(self): engine = SimplePipelineEngine( lambda column: self.pipeline_loader, self.env.trading_days, self.finder, ) window_length = 5 assets = self.all_assets dates = date_range( self.first_asset_start + self.trading_day, self.last_asset_end, freq=self.trading_day, ) dates_to_test = dates[window_length:] SMA = SimpleMovingAverage( inputs=(USEquityPricing.close, ), window_length=window_length, ) results = engine.run_pipeline( Pipeline(columns={'sma': SMA}), dates_to_test[0], dates_to_test[-1], ) # Shift back the raw inputs by a trading day because we expect our # computed results to be computed using values anchored on the # **previous** day's data. expected_raw = rolling_mean( self.writer.expected_values_2d( dates - self.trading_day, assets, 'close', ), window_length, min_periods=1, ) expected = DataFrame( # Truncate off the extra rows needed to compute the SMAs. expected_raw[window_length:], index=dates_to_test, # dates_to_test is dates[window_length:] columns=self.finder.retrieve_all(assets), ) self.write_nans(expected) result = results['sma'].unstack() assert_frame_equal(result, expected)
def make_pipeline(): """Sets up the pipeline""" dollar_volume = AverageDollarVolume(window_length=20) adv1000 = dollar_volume.top(1000) fd = Fundamentals(mask=adv1000) market_cap = fd.cshoq * fd.prccq # this is how to calculate market cap with Computstat fields book_equity = fd.seqq - fd.PS # this is a quick way to calculate book_equity book_to_price = book_equity / market_cap biggest = market_cap.top(500, mask=adv1000) smallest = market_cap.bottom(500, mask=adv1000) highpb = book_to_price.top(500, mask=adv1000) lowpb = book_to_price.bottom(500, mask=adv1000) momentum = Momentum(mask=adv1000) # momentum high_momentum = momentum.top(500, mask=adv1000) low_momentum = momentum.bottom(500, mask=adv1000) volatility = Volatility(mask=adv1000) highvol = volatility.top(500, mask=adv1000) lowvol = volatility.bottom(500, mask=adv1000) streversal = RSI(window_length=14, mask=adv1000) high_streversal = streversal.top(500, mask=adv1000) low_streversal = streversal.bottom(500, mask=adv1000) universe = biggest | smallest | highpb | lowpb | low_momentum | high_momentum return Pipeline( columns={ 'returns': Returns(window_length=2), # 'market_cap': market_cap, # not needed # 'book_to_price': book_to_price, # not needed 'biggest': biggest, 'smallest': smallest, 'highpb': highpb, 'lowpb': lowpb, # 'momentum': momentum, # not needed 'low_momentum': low_momentum, 'high_momentum': high_momentum, # 'volatility': volatility, # not needed 'highvol': highvol, 'lowvol': lowvol, # 'streversal': streversal, # not needed 'high_streversal': high_streversal, 'low_streversal': low_streversal }, screen=universe)
def test_show_graph(self): f = SomeFactor() p = Pipeline(columns={"f": SomeFactor()}) # The real display_graph call shells out to GraphViz, which isn't a # requirement, so patch it out for testing. def mock_display_graph(g, format="svg", include_asset_exists=False): return (g, format, include_asset_exists) assert getargspec(display_graph) == getargspec( mock_display_graph ), "Mock signature doesn't match signature for display_graph." patch_display_graph = patch( "zipline.pipeline.graph.display_graph", mock_display_graph, ) with patch_display_graph: graph, format, include_asset_exists = p.show_graph() assert graph.outputs["f"] is f # '' is a sentinel used for screen if it's not supplied. assert sorted(graph.outputs.keys()) == ["f", graph.screen_name] assert format == "svg" assert include_asset_exists is False with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format="png") assert graph.outputs["f"] is f # '' is a sentinel used for screen if it's not supplied. assert sorted(graph.outputs.keys()) == ["f", graph.screen_name] assert format == "png" assert include_asset_exists is False with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format="jpeg") assert graph.outputs["f"] is f assert sorted(graph.outputs.keys()) == ["f", graph.screen_name] assert format == "jpeg" assert include_asset_exists is False expected = ( r".*\.show_graph\(\) expected a value in " r"\('svg', 'png', 'jpeg'\) for argument 'format', " r"but got 'fizzbuzz' instead." ) with pytest.raises(ValueError, match=expected): p.show_graph(format="fizzbuzz")
def test_dollar_volume(self): results = self.engine.run_pipeline( Pipeline( columns={ 'dv1': AverageDollarVolume(window_length=1), 'dv5': AverageDollarVolume(window_length=5), }), self.dates[5], self.dates[-1], ) expected_1 = (self.raw_data[5:]**2) * 2 assert_frame_equal(results['dv1'].unstack(), expected_1) expected_5 = rolling_mean((self.raw_data**2) * 2, window=5)[5:] assert_frame_equal(results['dv5'].unstack(), expected_5)
def test_same_day_pipeline(self): loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) factor = AssetID() asset = self.assets[0] p = Pipeline(columns={'f': factor}, screen=factor <= asset) # The crux of this is that when we run the pipeline for a single day # (i.e. start and end dates are the same) we should accurately get # data for the day prior. result = engine.run_pipeline(p, self.dates[1], self.dates[1]) self.assertEqual(result['f'][0], 1.0)
def test_set_screen(self): f, g = SomeFilter(), SomeOtherFilter() p = Pipeline() self.assertEqual(p.screen, None) p.set_screen(f) self.assertEqual(p.screen, f) with self.assertRaises(ValueError): p.set_screen(f) p.set_screen(g, overwrite=True) self.assertEqual(p.screen, g)
def test_show_graph(self): f = SomeFactor() p = Pipeline(columns={'f': SomeFactor()}) # The real display_graph call shells out to GraphViz, which isn't a # requirement, so patch it out for testing. def mock_display_graph(g, format='svg', include_asset_exists=False): return (g, format, include_asset_exists) self.assertEqual( inspect.getargspec(display_graph), inspect.getargspec(mock_display_graph), msg="Mock signature doesn't match signature for display_graph.") patch_display_graph = patch( 'zipline.pipeline.graph.display_graph', mock_display_graph, ) with patch_display_graph: graph, format, include_asset_exists = p.show_graph() self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'svg') self.assertEqual(include_asset_exists, False) with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format='png') self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'png') self.assertEqual(include_asset_exists, False) with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format='jpeg') self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'jpeg') self.assertEqual(include_asset_exists, False) expected = (r".*\.show_graph\(\) expected a value in " r"\('svg', 'png', 'jpeg'\) for argument 'format', " r"but got 'fizzbuzz' instead.") with self.assertRaisesRegexp(ValueError, expected): p.show_graph(format='fizzbuzz')
def make_strategy_pipeline(context): pipe = Pipeline() # Set the volume filter, 126 days is roughly 6 month daily data volume_filter = average_volume_filter(126, 1E7) # compute past returns rsi_factor = technical_factor(126, rsi, 14) ema20_factor = technical_factor(126, ema, 20) ema50_factor = technical_factor(126, ema, 50) # add to pipelines pipe.add(rsi_factor,'rsi') pipe.add(ema20_factor,'ema20') pipe.add(ema50_factor,'ema50') pipe.set_screen(volume_filter) return pipe
def make_strategy_pipeline(context): pipe = Pipeline() # get the strategy parameters lookback = context.params['lookback'] * 21 v = context.params['min_volume'] # Set the volume filter volume_filter = average_volume_filter(lookback, v) # compute past returns vol_factor = technical_factor(lookback, volatility, 1) skew_factor = technical_factor(lookback, skewness, None) pipe.add(vol_factor, 'vol') pipe.add(skew_factor, 'skew') pipe.set_screen(volume_filter) return pipe
def test_numeric_factor(self): constants = self.constants loader = self.loader engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) num_dates = 5 dates = self.dates[10:10 + num_dates] high, low = USEquityPricing.high, USEquityPricing.low open, close = USEquityPricing.open, USEquityPricing.close high_minus_low = RollingSumDifference(inputs=[high, low]) open_minus_close = RollingSumDifference(inputs=[open, close]) avg = (high_minus_low + open_minus_close) / 2 results = engine.run_pipeline( Pipeline(columns={ 'high_low': high_minus_low, 'open_close': open_minus_close, 'avg': avg, }, ), dates[0], dates[-1], ) high_low_result = results['high_low'].unstack() expected_high_low = 3.0 * (constants[high] - constants[low]) assert_frame_equal( high_low_result, DataFrame(expected_high_low, index=dates, columns=self.assets), ) open_close_result = results['open_close'].unstack() expected_open_close = 3.0 * (constants[open] - constants[close]) assert_frame_equal( open_close_result, DataFrame(expected_open_close, index=dates, columns=self.assets), ) avg_result = results['avg'].unstack() expected_avg = (expected_high_low + expected_open_close) / 2.0 assert_frame_equal( avg_result, DataFrame(expected_avg, index=dates, columns=self.assets), )
def initialize(context): # The initialize method is called at the very start of your script's # execution. You can set up anything you'll be needing later here. The # context argument will be received by all pylivetrader methods in # your script, and you can store information on it that you'd like to # share between methods, or in later trades # let's create our pipeline and attach it to pylivetrader execution top5 = AverageDollarVolume(window_length=20).top(5) pipe = Pipeline({ 'close': USEquityPricing.close.latest, }, screen=top5) # this line connects the pipeline to pylivetrader. this is done once, # and we get a new and it's stored in the context. we will get a fresh list # of assets every morning in before_trading_start() context.attach_pipeline(pipe, "pipe")
def test_latest(self): columns = TDS.columns pipe = Pipeline( columns={c.name: c.latest for c in columns}, ) cal_slice = slice(20, 40) dates_to_test = self.calendar[cal_slice] result = self.engine.run_pipeline( pipe, dates_to_test[0], dates_to_test[-1], ) for column in columns: float_result = result[column.name].unstack() expected_float_result = self.expected_latest(column, cal_slice) assert_frame_equal(float_result, expected_float_result)
def test_compute(self, dates): engine = self.setup_engine(dates) self.setup(dates) pipe = Pipeline(columns=self.pipeline_columns) result = engine.run_pipeline( pipe, start_date=dates[0], end_date=dates[-1], ) for sid in self.sids: for col_name in self.cols.keys(): assert_series_equal(result[col_name].xs(sid, level=1), self.cols[col_name][sid], check_names=False)
def risk_loading_pipeline(sector_type='cn'): """ 为风险模型创建一个包含所有风险加载pipeline 返回 ---- pipeline:Pipeline 包含风险模型中每个因子的风险加载的pipeline """ columns = style_columns() if sector_type == 'sw': columns.update(sw_sector_columns()) elif sector_type == 'cn': columns.update(cn_sector_columns()) else: raise ValueError(f"不支持{sector_type}") return Pipeline(columns=columns, domain=CN_EQUITIES)
def initialize(context): pipeline = Pipeline() context.vwaps = [] for length in vwaps: name = vwap_key(length) factor = VWAP(window_length=length) context.vwaps.append(factor) pipeline.add(factor, name=name) filter_ = USEquityPricing.close.latest > 300 pipeline.add(filter_, "filter") if set_screen: pipeline.set_screen(filter_) attach_pipeline(pipeline, "test")
def test_single_column_output(self): """ Tests for custom factors that compute a 1D out. """ start_date = self.pipeline_start_date end_date = self.pipeline_end_date alternating_mask = (AssetIDPlusDay() % 2).eq(0) cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day) class SingleColumnOutput(CustomFactor): window_length = 1 inputs = [self.col] window_safe = True ndim = 1 def compute(self, today, assets, out, col): # Because we specified ndim as 1, `out` should be a singleton # array but `close` should be a regular sized input. assert out.shape == (1, ) assert col.shape == (1, 3) out[:] = col.sum() # Since we cannot add single column output factors as pipeline # columns, we have to test its output through another factor. class UsesSingleColumnOutput(CustomFactor): window_length = 1 inputs = [SingleColumnOutput()] def compute(self, today, assets, out, single_column_output): # Make sure that `single_column` has the correct shape. That # is, it should always have one column regardless of any mask # passed to `UsesSingleColumnInput`. assert single_column_output.shape == (1, 1) for mask in (alternating_mask, cascading_mask): columns = { 'uses_single_column_output': UsesSingleColumnOutput(), 'uses_single_column_output_masked': UsesSingleColumnOutput(mask=mask, ), } # Assertions about the expected shapes of our data are made in the # `compute` function of our custom factors above. self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
def test_rolling_and_nonrolling(self): open_ = USEquityPricing.open close = USEquityPricing.close volume = USEquityPricing.volume # Test for thirty days up to the last day that we think all # the assets existed. dates_to_test = self.dates[-30:] constants = {open_: 1, close: 2, volume: 3} loader = ConstantLoader( constants=constants, dates=self.dates, assets=self.assets, ) engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) sumdiff = RollingSumDifference() result = engine.run_pipeline( Pipeline(columns={ 'sumdiff': sumdiff, 'open': open_.latest, 'close': close.latest, 'volume': volume.latest, }, ), dates_to_test[0], dates_to_test[-1]) self.assertIsNotNone(result) self.assertEqual({'sumdiff', 'open', 'close', 'volume'}, set(result.columns)) result_index = self.assets * len(dates_to_test) result_shape = (len(result_index), ) check_arrays( result['sumdiff'], Series(index=result_index, data=full(result_shape, -3)), ) for name, const in [('open', 1), ('close', 2), ('volume', 3)]: check_arrays( result[name], Series(index=result_index, data=full(result_shape, const)), )
def make_pipeline(): dollar_volume = AverageDollarVolume(window_length=1) high_dollar_volume = dollar_volume.percentile_between(N, 100) recent_returns = Returns(window_length=N, mask=high_dollar_volume) low_returns = recent_returns.percentile_between(0, 10) high_returns = recent_returns.percentile_between(N, 100) dv2 = DV2() pipe_columns = { 'low_returns': low_returns, 'high_returns': high_returns, 'recent_returns': recent_returns, 'dollar_volume': dollar_volume, 'dv2': dv2 } pipe_screen = (low_returns | high_returns) pipe = Pipeline(columns=pipe_columns, screen=pipe_screen) return pipe
def test_compute(self, dates): engine = self.pipeline_event_setup_engine(dates) cols = self.setup(dates) pipe = Pipeline(columns=self.pipeline_columns) result = engine.run_pipeline( pipe, start_date=dates[0], end_date=dates[-1], ) for sid in self.get_sids(): for col_name in cols.keys(): assert_series_equal(result[col_name].unstack(1)[sid], cols[col_name][sid], check_names=False)
def test_latest(self): columns = TDS.columns pipe = Pipeline(columns={c.name: c.latest for c in columns}, ) cal_slice = slice(20, 40) dates_to_test = self.trading_days[cal_slice] result = self.engine.run_pipeline( pipe, dates_to_test[0], dates_to_test[-1], ) for column in columns: with ignore_pandas_nan_categorical_warning(): col_result = result[column.name].unstack() expected_col_result = self.expected_latest(column, cal_slice) assert_frame_equal(col_result, expected_col_result)
def test_overwrite(self): p = Pipeline() f = SomeFactor() other_f = SomeOtherFactor() p.add(f, 'f') self.assertEqual(p.columns, {'f': f}) with self.assertRaises(KeyError) as e: p.add(other_f, 'f') [message] = e.exception.args self.assertEqual(message, "Column 'f' already exists.") p.add(other_f, 'f', overwrite=True) self.assertEqual(p.columns, {'f': other_f})
def make_pipeline(): """ A function to create our dynamic stock selector (pipeline). Documentation on pipeline can be found here: https://www.quantopian.com/help#pipeline-title """ # Base universe set to the QTradableStocksUS base_universe = TradableStocksUS() # Factor of yesterday's close price. yesterday_close = USEquityPricing.close.latest pipe = Pipeline(columns={ 'close': yesterday_close, }, screen=base_universe) return pipe
def make_pipeline(): yearly_returns = Returns(window_length=252) monthly_returns = Returns(window_length=21) lagged_returns = yearly_returns - monthly_returns return Pipeline( columns={ 'lagged_returns': lagged_returns, 'marketcap': MyDataSet.marketcap.latest, }, screen=lagged_returns.notnull() & MyDataSet.marketcap.latest.notnull() & MyDataSet.marketcap.latest.top(500) )
def create_high_dollar_volume_pipeline(): pipe = Pipeline() dollar_volume = AverageDollarVolume( window_length=63) # 63 days = 1 quarter pipe.add(dollar_volume, 'dollar_volume') high_dollar_volume = dollar_volume.percentile_between( 95, 100) # top 5% by dollar volume pipe.set_screen(high_dollar_volume) return pipe
def test_show_graph(self): f = SomeFactor() p = Pipeline(columns={'f': SomeFactor()}) # The real display_graph call shells out to GraphViz, which isn't a # requirement, so patch it out for testing. def mock_display_graph(g, format='svg', include_asset_exists=False): return (g, format, include_asset_exists) self.assertEqual( inspect.getargspec(display_graph), inspect.getargspec(mock_display_graph), msg="Mock signature doesn't match signature for display_graph." ) patch_display_graph = patch( 'zipline.pipeline.graph.display_graph', mock_display_graph, ) with patch_display_graph: graph, format, include_asset_exists = p.show_graph() self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'svg') self.assertEqual(include_asset_exists, False) with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format='png') self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'png') self.assertEqual(include_asset_exists, False) with patch_display_graph: graph, format, include_asset_exists = p.show_graph(format='jpeg') self.assertIs(graph.outputs['f'], f) # '' is a sentinel used for screen if it's not supplied. self.assertEqual(sorted(graph.outputs.keys()), ['', 'f']) self.assertEqual(format, 'jpeg') self.assertEqual(include_asset_exists, False) expected = ( r".*\.show_graph\(\) expected a value in " r"\('svg', 'png', 'jpeg'\) for argument 'format', " r"but got 'fizzbuzz' instead." ) with self.assertRaisesRegexp(ValueError, expected): p.show_graph(format='fizzbuzz')
def make_pipeline(): filter1 = CNEquityPricing.volume.latest > 4000 # filter2 = CNEquityPricing.high.latest < CNEquityPricing.up_limit.latest/1000 # filter3 = CNEquityPricing.high.latest > CNEquityPricing.down_limit.latest/1000 close = CNEquityPricing.close.latest market_cap = CNEquityPricing.close.latest * CNFinancialData.total_share_0QE.latest universe = filter1 & market_cap.notnull() maket_cap_1 = market_cap.deciles(mask=universe).eq(0) market_cap_top5 = market_cap.bottom(5, mask=maket_cap_1) # market_cap_1_top = market_cap.top(5, mask=maket_cap_1) pipe = Pipeline() pipe.add(market_cap, 'market_cap') pipe.add(close, 'close') pipe.set_screen(market_cap_top5) return pipe
def test_multiple_rolling_factors(self): loader = self.loader finder = self.asset_finder assets = self.assets engine = SimplePipelineEngine( lambda column: loader, self.dates, self.asset_finder, ) shape = num_dates, num_assets = (5, len(assets)) dates = self.dates[10:10 + num_dates] short_factor = RollingSumDifference(window_length=3) long_factor = RollingSumDifference(window_length=5) high_factor = RollingSumDifference( window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high], ) pipeline = Pipeline(columns={ 'short': short_factor, 'long': long_factor, 'high': high_factor, }) results = engine.run_pipeline(pipeline, dates[0], dates[-1]) self.assertEqual(set(results.columns), {'short', 'high', 'long'}) assert_multi_index_is_product(self, results.index, dates, finder.retrieve_all(assets)) # row-wise sum over an array whose values are all (1 - 2) check_arrays( results['short'].unstack().values, full(shape, -short_factor.window_length), ) check_arrays( results['long'].unstack().values, full(shape, -long_factor.window_length), ) # row-wise sum over an array whose values are all (1 - 3) check_arrays( results['high'].unstack().values, full(shape, -2 * high_factor.window_length), )
def make_pipeline(context): """ A function to create our pipeline (dynamic stock selector). The pipeline is used to rank stocks based on different factors, including builtin factors, or custom factors that you can define. Documentation on pipeline can be found here: https://www.quantopian.com/help#pipeline-title """ # Create a pipeline object. # Create a dollar_volume factor using default inputs and window_length. # This is a builtin factor. dollar_volume = AverageDollarVolume(window_length=1) # Define high dollar-volume filter to be the top 2% of stocks by dollar # volume. high_dollar_volume = dollar_volume.percentile_between(95, 100) # Create a recent_returns factor with a 5-day returns lookback for all securities # in our high_dollar_volume Filter. This is a custom factor defined below (see # RecentReturns class). recent_returns = Returns( window_length=16, mask=high_dollar_volume) # Define high and low returns filters to be the bottom 1% and top 1% of # securities in the high dollar-volume group. low_returns = recent_returns.percentile_between(0, 5) high_returns = recent_returns.percentile_between(95, 100) # Define a column dictionary that holds all the Factors pipe_columns = { 'low_returns': low_returns, 'high_returns': high_returns, 'recent_returns': recent_returns, 'dollar_volume': dollar_volume } # Add a filter to the pipeline such that only high-return and low-return # securities are kept. # pipe_screen = (low_returns & liquidity_filter | high_returns & vol_filter) pipe_screen = (low_returns | high_returns) # Create a pipeline object with the defined columns and screen. pipe = Pipeline(columns=pipe_columns, screen=pipe_screen) return pipe
def make_pipeline(): log.info('Making Pipeline') mktcap = IEXKeyStats.marketcap.latest primary_share = IsPrimaryShareEmulation() universe = mktcap.top(1500, mask=primary_share) pipe = Pipeline( { 'close': USEquityPricing.close.latest, 'str': ComputeSTR(), 'ind': IEXCompany.industry.latest, 'type': IEXCompany.issueType.latest, 'symbol': IEXCompany.symbol.latest, }, screen=universe) return pipe
def make_pipeline(): """ A function to create our dynamic stock selector (pipeline). Documentation on pipeline can be found here: https://www.quantopian.com/help#pipeline-title """ base_universe = StaticAssets( symbols('XLY', 'XLP', 'XLE', 'XLF', 'XLV', 'XLI', 'XLB', 'XLK', 'XLU')) # Factor of yesterday's close price. yesterday_close = USEquityPricing.close.latest pipeline = Pipeline(columns={ 'close': yesterday_close, }, screen=base_universe) return pipeline
def _check_filters(self, evens, odds, first_five, last_three): pipe = Pipeline(columns={ 'sid': SidFactor(), 'evens': evens, 'odds': odds, 'first_five': first_five, 'last_three': last_three, }, ) start, end = self.trading_days[[-10, -1]] results = self.run_pipeline(pipe, start, end).unstack() sids = results.sid.astype(int64_dtype) assert_equal(results.evens, ~(sids % 2).astype(bool)) assert_equal(results.odds, (sids % 2).astype(bool)) assert_equal(results.first_five, sids < 5) assert_equal(results.last_three, sids >= 7)
def test_remove(self): f = SomeFactor() p = Pipeline(columns={"f": f}) with self.assertRaises(KeyError) as e: p.remove("not_a_real_name") self.assertEqual(f, p.remove("f")) with self.assertRaises(KeyError) as e: p.remove("f") self.assertEqual(e.exception.args, ("f",))
def test_remove(self): f = SomeFactor() p = Pipeline(columns={'f': f}) with self.assertRaises(KeyError) as e: p.remove('not_a_real_name') self.assertEqual(f, p.remove('f')) with self.assertRaises(KeyError) as e: p.remove('f') self.assertEqual(e.exception.args, ('f',))
def test_regression_of_returns_factor(self, returns_length, regression_length): """ Tests for the built-in factor `RollingLinearRegressionOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline # The order of these is meant to align with the output of `linregress`. outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr'] returns = Returns(window_length=returns_length) masks = self.cascading_mask, self.alternating_mask, NotSpecified expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): regression_factor = RollingLinearRegressionOfReturns( target=my_asset, returns_length=returns_length, regression_length=regression_length, mask=mask, ) columns = { output: getattr(regression_factor, output) for output in outputs } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = run_pipeline(pipeline, start_date, end_date) if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) output_results = {} expected_output_results = {} for output in outputs: output_results[output] = results[output].unstack() expected_output_results[output] = full_like( output_results[output], nan, ) # Run a separate pipeline that calculates returns starting # (regression_length - 1) days prior to our start date. This is # because we need (regression_length - 1) extra days of returns to # compute our expected regressions. results = run_pipeline( Pipeline(columns={'returns': returns}), dates[start_date_index - (regression_length - 1)], dates[end_date_index], ) returns_results = results['returns'].unstack() # On each day, calculate the expected regression results for Y ~ X # where Y is the asset we are interested in and X is each other # asset. Each regression is calculated over `regression_length` # days of data. for day in range(num_days): todays_returns = returns_results.iloc[ day:day + regression_length ] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_regression_results = linregress( y=other_asset_returns, x=my_asset_returns, ) for i, output in enumerate(outputs): expected_output_results[output][day, asset_column] = \ expected_regression_results[i] for output in outputs: output_result = output_results[output] expected_output_result = DataFrame( where(expected_mask, expected_output_results[output], nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(output_result, expected_output_result)
def test_correlation_factors(self, returns_length, correlation_length): """ Tests for the built-in factors `RollingPearsonOfReturns` and `RollingSpearmanOfReturns`. """ assets = self.assets my_asset = self.my_asset my_asset_column = self.my_asset_column dates = self.dates start_date = self.pipeline_start_date end_date = self.pipeline_end_date start_date_index = self.start_date_index end_date_index = self.end_date_index num_days = self.num_days run_pipeline = self.run_pipeline returns = Returns(window_length=returns_length) masks = (self.cascading_mask, self.alternating_mask, NotSpecified) expected_mask_results = ( self.expected_cascading_mask_result, self.expected_alternating_mask_result, self.expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): pearson_factor = RollingPearsonOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) spearman_factor = RollingSpearmanOfReturns( target=my_asset, returns_length=returns_length, correlation_length=correlation_length, mask=mask, ) columns = { 'pearson_factor': pearson_factor, 'spearman_factor': spearman_factor, } pipeline = Pipeline(columns=columns) if mask is not NotSpecified: pipeline.add(mask, 'mask') results = run_pipeline(pipeline, start_date, end_date) pearson_results = results['pearson_factor'].unstack() spearman_results = results['spearman_factor'].unstack() if mask is not NotSpecified: mask_results = results['mask'].unstack() check_arrays(mask_results.values, expected_mask) # Run a separate pipeline that calculates returns starting # (correlation_length - 1) days prior to our start date. This is # because we need (correlation_length - 1) extra days of returns to # compute our expected correlations. results = run_pipeline( Pipeline(columns={'returns': returns}), dates[start_date_index - (correlation_length - 1)], dates[end_date_index], ) returns_results = results['returns'].unstack() # On each day, calculate the expected correlation coefficients # between the asset we are interested in and each other asset. Each # correlation is calculated over `correlation_length` days. expected_pearson_results = full_like(pearson_results, nan) expected_spearman_results = full_like(spearman_results, nan) for day in range(num_days): todays_returns = returns_results.iloc[ day:day + correlation_length ] my_asset_returns = todays_returns.iloc[:, my_asset_column] for asset, other_asset_returns in todays_returns.iteritems(): asset_column = int(asset) - 1 expected_pearson_results[day, asset_column] = pearsonr( my_asset_returns, other_asset_returns, )[0] expected_spearman_results[day, asset_column] = spearmanr( my_asset_returns, other_asset_returns, )[0] expected_pearson_results = DataFrame( data=where(expected_mask, expected_pearson_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(pearson_results, expected_pearson_results) expected_spearman_results = DataFrame( data=where(expected_mask, expected_spearman_results, nan), index=dates[start_date_index:end_date_index + 1], columns=assets, ) assert_frame_equal(spearman_results, expected_spearman_results)
def initialize(context): p = Pipeline('test') p.add(USEquityPricing.close.latest, 'close') attach_pipeline(p)
def initialize(context): pipe = Pipeline() attach_pipeline(pipe, "example") sma_short = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=30) sma_long = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=100) # Combined factors to create new factors sma_val = sma_short / sma_long # Create and apply a screen to remove penny stocks remove_penny_stocks = sma_short > 1.0 pipe.set_screen(remove_penny_stocks) pipe.add(sma_short, "sma_short") pipe.add(sma_long, "sma_long") pipe.add(sma_val, "sma_val") # Rank a factor using a mask to ignore the values we're # filtering out by passing mask=remove_penny_stocks to rank. pipe.add(sma_val.rank(mask=remove_penny_stocks), "sma_rank")
def test_factor_with_multiple_outputs(self): dates = self.dates[5:10] assets = self.assets asset_ids = self.asset_ids constants = self.constants open = USEquityPricing.open close = USEquityPricing.close engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder) def create_expected_results(expected_value, mask): expected_values = where(mask, expected_value, nan) return DataFrame(expected_values, index=dates, columns=assets) cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day) expected_cascading_mask_result = array( [ [True, True, True, False], [True, True, False, False], [True, False, False, False], [False, False, False, False], [False, False, False, False], ], dtype=bool, ) alternating_mask = (AssetIDPlusDay() % 2).eq(0) expected_alternating_mask_result = array( [ [False, True, False, True], [True, False, True, False], [False, True, False, True], [True, False, True, False], [False, True, False, True], ], dtype=bool, ) expected_no_mask_result = array( [ [True, True, True, True], [True, True, True, True], [True, True, True, True], [True, True, True, True], [True, True, True, True], ], dtype=bool, ) masks = cascading_mask, alternating_mask, NotSpecified expected_mask_results = ( expected_cascading_mask_result, expected_alternating_mask_result, expected_no_mask_result, ) for mask, expected_mask in zip(masks, expected_mask_results): open_price, close_price = MultipleOutputs(mask=mask) pipeline = Pipeline(columns={"open_price": open_price, "close_price": close_price}) if mask is not NotSpecified: pipeline.add(mask, "mask") results = engine.run_pipeline(pipeline, dates[0], dates[-1]) for colname, case_column in (("open_price", open), ("close_price", close)): if mask is not NotSpecified: mask_results = results["mask"].unstack() check_arrays(mask_results.values, expected_mask) output_results = results[colname].unstack() output_expected = create_expected_results(constants[case_column], expected_mask) assert_frame_equal(output_results, output_expected)