Пример #1
0
    def test_bad_dates(self):
        loader = self.loader
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)

        p = Pipeline()

        msg = "start_date must be before or equal to end_date .*"
        with self.assertRaisesRegexp(ValueError, msg):
            engine.run_pipeline(p, self.dates[2], self.dates[1])
Пример #2
0
    def test_bad_dates(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        p = Pipeline()

        msg = "start_date must be before or equal to end_date .*"
        with self.assertRaisesRegexp(ValueError, msg):
            engine.run_pipeline(p, self.dates[2], self.dates[1])
Пример #3
0
    def test_rolling_and_nonrolling(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the last day that we think all
        # the assets existed.
        dates_to_test = self.dates[-30:]

        constants = {open_: 1, close: 2, volume: 3}
        loader = PrecomputedLoader(constants=constants, dates=self.dates, sids=self.asset_ids)
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={"sumdiff": sumdiff, "open": open_.latest, "close": close.latest, "volume": volume.latest}
            ),
            dates_to_test[0],
            dates_to_test[-1],
        )
        self.assertIsNotNone(result)
        self.assertEqual({"sumdiff", "open", "close", "volume"}, set(result.columns))

        result_index = self.asset_ids * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(result["sumdiff"], Series(index=result_index, data=full(result_shape, -3, dtype=float)))

        for name, const in [("open", 1), ("close", 2), ("volume", 3)]:
            check_arrays(result[name], Series(index=result_index, data=full(result_shape, const, dtype=float)))
Пример #4
0
    def test_factor_with_single_output(self):
        """
        Test passing an `outputs` parameter of length 1 to a CustomFactor.
        """
        dates = self.dates[5:10]
        assets = self.assets
        num_dates = len(dates)
        open = USEquityPricing.open
        open_values = [self.constants[open]] * num_dates
        open_values_as_tuple = [(self.constants[open],)] * num_dates
        engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder)

        single_output = OpenPrice(outputs=["open"])
        pipeline = Pipeline(columns={"open_instance": single_output, "open_attribute": single_output.open})
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        # The instance `single_output` itself will compute a numpy.recarray
        # when added as a column to our pipeline, so we expect its output
        # values to be 1-tuples.
        open_instance_expected = {asset: open_values_as_tuple for asset in assets}
        open_attribute_expected = {asset: open_values for asset in assets}

        for colname, expected_values in (
            ("open_instance", open_instance_expected),
            ("open_attribute", open_attribute_expected),
        ):
            column_results = results[colname].unstack()
            expected_results = DataFrame(expected_values, index=dates, columns=assets, dtype=float64)
            assert_frame_equal(column_results, expected_results)
Пример #5
0
    def test_drawdown(self):
        # The monotonically-increasing data produced by SyntheticDailyBarWriter
        # exercises two pathological cases for MaxDrawdown.  The actual
        # computed results are pretty much useless (everything is either NaN)
        # or zero, but verifying we correctly handle those corner cases is
        # valuable.
        engine = SimplePipelineEngine(
            lambda column: self.pipeline_loader, self.trading_calendar.all_sessions, self.asset_finder
        )
        window_length = 5
        asset_ids = self.all_asset_ids
        dates = date_range(
            self.first_asset_start + self.trading_calendar.day, self.last_asset_end, freq=self.trading_calendar.day
        )
        dates_to_test = dates[window_length:]

        drawdown = MaxDrawdown(inputs=(USEquityPricing.close,), window_length=window_length)

        results = engine.run_pipeline(Pipeline(columns={"drawdown": drawdown}), dates_to_test[0], dates_to_test[-1])

        # We expect NaNs when the asset was undefined, otherwise 0 everywhere,
        # since the input is always increasing.
        expected = DataFrame(
            data=zeros((len(dates_to_test), len(asset_ids)), dtype=float),
            index=dates_to_test,
            columns=self.asset_finder.retrieve_all(asset_ids),
        )
        self.write_nans(expected)
        result = results["drawdown"].unstack()

        assert_frame_equal(expected, result)
Пример #6
0
    def test_numeric_factor(self):
        constants = self.constants
        loader = self.loader
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)
        num_dates = 5
        dates = self.dates[10 : 10 + num_dates]
        high, low = USEquityPricing.high, USEquityPricing.low
        open, close = USEquityPricing.open, USEquityPricing.close

        high_minus_low = RollingSumDifference(inputs=[high, low])
        open_minus_close = RollingSumDifference(inputs=[open, close])
        avg = (high_minus_low + open_minus_close) / 2

        results = engine.run_pipeline(
            Pipeline(columns={"high_low": high_minus_low, "open_close": open_minus_close, "avg": avg}),
            dates[0],
            dates[-1],
        )

        high_low_result = results["high_low"].unstack()
        expected_high_low = 3.0 * (constants[high] - constants[low])
        assert_frame_equal(high_low_result, DataFrame(expected_high_low, index=dates, columns=self.assets))

        open_close_result = results["open_close"].unstack()
        expected_open_close = 3.0 * (constants[open] - constants[close])
        assert_frame_equal(open_close_result, DataFrame(expected_open_close, index=dates, columns=self.assets))

        avg_result = results["avg"].unstack()
        expected_avg = (expected_high_low + expected_open_close) / 2.0
        assert_frame_equal(avg_result, DataFrame(expected_avg, index=dates, columns=self.assets))
Пример #7
0
    def test_SMA(self):
        engine = SimplePipelineEngine(
            lambda column: self.pipeline_loader, self.trading_calendar.all_sessions, self.asset_finder
        )
        window_length = 5
        asset_ids = self.all_asset_ids
        dates = date_range(
            self.first_asset_start + self.trading_calendar.day, self.last_asset_end, freq=self.trading_calendar.day
        )
        dates_to_test = dates[window_length:]

        SMA = SimpleMovingAverage(inputs=(USEquityPricing.close,), window_length=window_length)

        results = engine.run_pipeline(Pipeline(columns={"sma": SMA}), dates_to_test[0], dates_to_test[-1])

        # Shift back the raw inputs by a trading day because we expect our
        # computed results to be computed using values anchored on the
        # **previous** day's data.
        expected_raw = rolling_mean(
            expected_bar_values_2d(dates - self.trading_calendar.day, self.equity_info, "close"),
            window_length,
            min_periods=1,
        )

        expected = DataFrame(
            # Truncate off the extra rows needed to compute the SMAs.
            expected_raw[window_length:],
            index=dates_to_test,  # dates_to_test is dates[window_length:]
            columns=self.asset_finder.retrieve_all(asset_ids),
        )
        self.write_nans(expected)
        result = results["sma"].unstack()
        assert_frame_equal(result, expected)
Пример #8
0
    def test_engine_with_multicolumn_loader(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the second to last day that we think all
        # the assets existed.  If we test the last day of our calendar, no
        # assets will be in our output, because their end dates are all
        dates_to_test = self.dates[-32:-2]

        constants = {open_: 1, close: 2, volume: 3}
        loader = ConstantLoader(constants=constants, dates=self.dates, assets=self.assets)
        engine = SimplePipelineEngine(loader, self.dates, self.asset_finder)

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={"sumdiff": sumdiff, "open": open_.latest, "close": close.latest, "volume": volume.latest}
            ),
            dates_to_test[0],
            dates_to_test[-1],
        )
        self.assertIsNotNone(result)
        self.assertEqual({"sumdiff", "open", "close", "volume"}, set(result.columns))

        result_index = self.assets * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(result["sumdiff"], Series(index=result_index, data=full(result_shape, -3)))

        for name, const in [("open", 1), ("close", 2), ("volume", 3)]:
            check_arrays(result[name], Series(index=result_index, data=full(result_shape, const)))
Пример #9
0
    def test_input_dates_provided_by_default(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )

        class TestFactor(CustomFactor):
            inputs = [InputDates(), USEquityPricing.close]
            window_length = 10
            dtype = datetime64ns_dtype

            def compute(self, today, assets, out, dates, closes):
                first, last = dates[[0, -1], 0]
                assert last == today.asm8
                assert len(dates) == len(closes) == self.window_length
                out[:] = first

        p = Pipeline(columns={'t': TestFactor()})
        results = engine.run_pipeline(p, self.dates[9], self.dates[10])

        # All results are the same, so just grab one column.
        column = results.unstack().iloc[:, 0].values
        check_arrays(column, self.dates[:2].values)
Пример #10
0
    def test_screen(self):
        loader = self.loader
        finder = self.asset_finder
        assets = array(self.assets)
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        num_dates = 5
        dates = self.dates[10:10 + num_dates]

        factor = AssetID()
        for asset in assets:
            p = Pipeline(columns={'f': factor}, screen=factor <= asset)
            result = engine.run_pipeline(p, dates[0], dates[-1])

            expected_sids = assets[assets <= asset]
            expected_assets = finder.retrieve_all(expected_sids)
            expected_result = DataFrame(
                index=MultiIndex.from_product([dates, expected_assets]),
                data=tile(expected_sids.astype(float), [len(dates)]),
                columns=['f'],
            )

            assert_frame_equal(result, expected_result)
Пример #11
0
    def test_single_factor(self):
        loader = self.loader
        finder = self.asset_finder
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        result_shape = (num_dates, num_assets) = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        factor = RollingSumDifference()
        expected_result = -factor.window_length

        # Since every asset will pass the screen, these should be equivalent.
        pipelines = [
            Pipeline(columns={'f': factor}),
            Pipeline(
                columns={'f': factor},
                screen=factor.eq(expected_result),
            ),
        ]

        for p in pipelines:
            result = engine.run_pipeline(p, dates[0], dates[-1])
            self.assertEqual(set(result.columns), {'f'})
            assert_multi_index_is_product(self, result.index, dates,
                                          finder.retrieve_all(assets))

            check_arrays(
                result['f'].unstack().values,
                full(result_shape, expected_result),
            )
Пример #12
0
    def test_custom_factor_outputs_parameter(self):
        dates = self.dates[5:10]
        assets = self.assets
        num_dates = len(dates)
        num_assets = len(assets)
        constants = self.constants
        engine = SimplePipelineEngine(
            lambda column: self.loader, self.dates, self.asset_finder,
        )

        def create_expected_results(expected_value):
            expected_values = full(
                (num_dates, num_assets), expected_value, float64,
            )
            return DataFrame(expected_values, index=dates, columns=assets)

        for window_length in range(1, 3):
            sum_, diff = OpenCloseSumAndDiff(
                outputs=['sum_', 'diff'], window_length=window_length,
            )
            pipeline = Pipeline(columns={'sum_': sum_, 'diff': diff})
            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, op in ('sum_', add), ('diff', sub):
                output_results = results[colname].unstack()
                output_expected = create_expected_results(
                    op(
                        constants[USEquityPricing.open] * window_length,
                        constants[USEquityPricing.close] * window_length,
                    )
                )
                assert_frame_equal(output_results, output_expected)
Пример #13
0
    def test_instance_of_factor_with_multiple_outputs(self):
        """
        Test adding a CustomFactor instance, which has multiple outputs, as a
        pipeline column directly. Its computed values should be tuples
        containing the computed values of each of its outputs.
        """
        dates = self.dates[5:10]
        assets = self.assets
        num_dates = len(dates)
        num_assets = len(assets)
        constants = self.constants
        engine = SimplePipelineEngine(
            lambda column: self.loader, self.dates, self.asset_finder,
        )

        open_values = [constants[USEquityPricing.open]] * num_assets
        close_values = [constants[USEquityPricing.close]] * num_assets
        expected_values = [list(zip(open_values, close_values))] * num_dates
        expected_results = DataFrame(
            expected_values, index=dates, columns=assets, dtype=float64,
        )

        multiple_outputs = MultipleOutputs()
        pipeline = Pipeline(columns={'instance': multiple_outputs})
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])
        instance_results = results['instance'].unstack()
        assert_frame_equal(instance_results, expected_results)
Пример #14
0
    def test_single_factor(self):
        loader = self.loader
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )
        result_shape = (num_dates, num_assets) = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        factor = RollingSumDifference()
        expected_result = -factor.window_length

        # Since every asset will pass the screen, these should be equivalent.
        pipelines = [
            Pipeline(columns={'f': factor}),
            Pipeline(
                columns={'f': factor},
                screen=factor.eq(expected_result),
            ),
        ]

        for p in pipelines:
            result = engine.run_pipeline(p, dates[0], dates[-1])
            self.assertEqual(set(result.columns), {'f'})
            assert_multi_index_is_product(
                self, result.index, dates, assets
            )

            check_arrays(
                result['f'].unstack().values,
                full(result_shape, expected_result, dtype=float),
            )
Пример #15
0
    def test_custom_factor_outputs_parameter(self):
        dates = self.dates[5:10]
        assets = self.assets
        num_dates = len(dates)
        num_assets = len(assets)
        constants = self.constants
        engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder)

        def create_expected_results(expected_value):
            expected_values = full((num_dates, num_assets), expected_value, float64)
            return DataFrame(expected_values, index=dates, columns=assets)

        for window_length in range(1, 3):
            sum_, diff = OpenCloseSumAndDiff(outputs=["sum_", "diff"], window_length=window_length)
            pipeline = Pipeline(columns={"sum_": sum_, "diff": diff})
            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, op in ("sum_", add), ("diff", sub):
                output_results = results[colname].unstack()
                output_expected = create_expected_results(
                    op(
                        constants[USEquityPricing.open] * window_length,
                        constants[USEquityPricing.close] * window_length,
                    )
                )
                assert_frame_equal(output_results, output_expected)
Пример #16
0
 def run_pipeline(self, *args, **kwargs):
     calendar = US_EQUITIES
     loader = self.loader
     finder = self.asset_finder
     engine = SimplePipelineEngine(lambda col: loader,
                                   finder,
                                   default_domain=calendar)
     return engine.run_pipeline(*args, **kwargs)
Пример #17
0
    def test_rolling_and_nonrolling(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the last day that we think all
        # the assets existed.
        dates_to_test = self.dates[-30:]

        constants = {open_: 1, close: 2, volume: 3}
        loader = PrecomputedLoader(
            constants=constants,
            dates=self.dates,
            sids=self.asset_ids,
        )
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={
                    'sumdiff': sumdiff,
                    'open': open_.latest,
                    'close': close.latest,
                    'volume': volume.latest,
                },
            ),
            dates_to_test[0],
            dates_to_test[-1]
        )
        self.assertIsNotNone(result)
        self.assertEqual(
            {'sumdiff', 'open', 'close', 'volume'},
            set(result.columns)
        )

        result_index = self.asset_ids * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(
            result['sumdiff'],
            Series(
                index=result_index,
                data=full(result_shape, -3, dtype=float),
            ),
        )

        for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
            check_arrays(
                result[name],
                Series(
                    index=result_index,
                    data=full(result_shape, const, dtype=float),
                ),
            )
Пример #18
0
    def test_factor_with_multiple_outputs(self):
        dates = self.dates[5:10]
        assets = self.assets
        asset_ids = self.asset_ids
        constants = self.constants
        num_dates = len(dates)
        num_assets = len(assets)
        open = USEquityPricing.open
        close = USEquityPricing.close
        engine = SimplePipelineEngine(
            lambda column: self.loader, self.dates, self.asset_finder,
        )

        def create_expected_results(expected_value, mask):
            expected_values = where(mask, expected_value, nan)
            return DataFrame(expected_values, index=dates, columns=assets)

        cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day)
        expected_cascading_mask_result = make_cascading_boolean_array(
            shape=(num_dates, num_assets),
        )

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        expected_alternating_mask_result = make_alternating_boolean_array(
            shape=(num_dates, num_assets), first_value=False,
        )

        expected_no_mask_result = full(
            shape=(num_dates, num_assets), fill_value=True, dtype=bool_dtype,
        )

        masks = cascading_mask, alternating_mask, NotSpecified
        expected_mask_results = (
            expected_cascading_mask_result,
            expected_alternating_mask_result,
            expected_no_mask_result,
        )
        for mask, expected_mask in zip(masks, expected_mask_results):
            open_price, close_price = MultipleOutputs(mask=mask)
            pipeline = Pipeline(
                columns={'open_price': open_price, 'close_price': close_price},
            )
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, case_column in (('open_price', open),
                                         ('close_price', close)):
                if mask is not NotSpecified:
                    mask_results = results['mask'].unstack()
                    check_arrays(mask_results.values, expected_mask)
                output_results = results[colname].unstack()
                output_expected = create_expected_results(
                    constants[case_column], expected_mask,
                )
                assert_frame_equal(output_results, output_expected)
Пример #19
0
    def test_same_day_pipeline(self):
        loader = self.loader
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)
        factor = AssetID()
        asset = self.asset_ids[0]
        p = Pipeline(columns={"f": factor}, screen=factor <= asset)

        # The crux of this is that when we run the pipeline for a single day
        #  (i.e. start and end dates are the same) we should accurately get
        # data for the day prior.
        result = engine.run_pipeline(p, self.dates[1], self.dates[1])
        self.assertEqual(result["f"][0], 1.0)
Пример #20
0
    def test_fail_usefully_on_insufficient_data(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        class SomeFactor(CustomFactor):
            inputs = [USEquityPricing.close]
            window_length = 10

            def compute(self, today, assets, out, closes):
                pass

        p = Pipeline(columns={'t': SomeFactor()})

        # self.dates[9] is the earliest date we should be able to compute.
        engine.run_pipeline(p, self.dates[9], self.dates[9])

        # We shouldn't be able to compute dates[8], since we only know about 8
        # prior dates, and we need a window length of 10.
        with self.assertRaises(NoFurtherDataError):
            engine.run_pipeline(p, self.dates[8], self.dates[8])
Пример #21
0
    def test_rolling_and_nonrolling(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the last day that we think all
        # the assets existed.
        dates_to_test = self.dates[-30:]

        constants = {open_: 1, close: 2, volume: 3}
        loader = ConstantLoader(
            constants=constants,
            dates=self.dates,
            assets=self.assets,
        )
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={
                    'sumdiff': sumdiff,
                    'open': open_.latest,
                    'close': close.latest,
                    'volume': volume.latest,
                },
            ),
            dates_to_test[0],
            dates_to_test[-1]
        )
        self.assertIsNotNone(result)
        self.assertEqual(
            {'sumdiff', 'open', 'close', 'volume'},
            set(result.columns)
        )

        result_index = self.assets * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(
            result['sumdiff'],
            Series(index=result_index, data=full(result_shape, -3)),
        )

        for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
            check_arrays(
                result[name],
                Series(index=result_index, data=full(result_shape, const)),
            )
Пример #22
0
    def test_engine_with_multicolumn_loader(self):
        open_ = USEquityPricing.open
        close = USEquityPricing.close
        volume = USEquityPricing.volume

        # Test for thirty days up to the second to last day that we think all
        # the assets existed.  If we test the last day of our calendar, no
        # assets will be in our output, because their end dates are all
        dates_to_test = self.dates[-32:-2]

        constants = {open_: 1, close: 2, volume: 3}
        loader = ConstantLoader(
            constants=constants,
            dates=self.dates,
            assets=self.assets,
        )
        engine = SimplePipelineEngine(loader, self.dates, self.asset_finder)

        sumdiff = RollingSumDifference()

        result = engine.run_pipeline(
            Pipeline(
                columns={
                    'sumdiff': sumdiff,
                    'open': open_.latest,
                    'close': close.latest,
                    'volume': volume.latest,
                },
            ),
            dates_to_test[0],
            dates_to_test[-1]
        )
        self.assertIsNotNone(result)
        self.assertEqual(
            {'sumdiff', 'open', 'close', 'volume'},
            set(result.columns)
        )

        result_index = self.assets * len(dates_to_test)
        result_shape = (len(result_index),)
        check_arrays(
            result['sumdiff'],
            Series(index=result_index, data=full(result_shape, -3)),
        )

        for name, const in [('open', 1), ('close', 2), ('volume', 3)]:
            check_arrays(
                result[name],
                Series(index=result_index, data=full(result_shape, const)),
            )
Пример #23
0
    def test_same_day_pipeline(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )
        factor = AssetID()
        asset = self.asset_ids[0]
        p = Pipeline(columns={'f': factor}, screen=factor <= asset)

        # The crux of this is that when we run the pipeline for a single day
        #  (i.e. start and end dates are the same) we should accurately get
        # data for the day prior.
        result = engine.run_pipeline(p, self.dates[1], self.dates[1])
        self.assertEqual(result['f'][0], 1.0)
Пример #24
0
    def test_SMA(self):
        engine = SimplePipelineEngine(
            lambda column: self.pipeline_loader,
            self.env.trading_days,
            self.finder,
        )
        window_length = 5
        assets = self.all_assets
        dates = date_range(
            self.first_asset_start + self.trading_day,
            self.last_asset_end,
            freq=self.trading_day,
        )
        dates_to_test = dates[window_length:]

        SMA = SimpleMovingAverage(
            inputs=(USEquityPricing.close, ),
            window_length=window_length,
        )

        results = engine.run_pipeline(
            Pipeline(columns={'sma': SMA}),
            dates_to_test[0],
            dates_to_test[-1],
        )

        # Shift back the raw inputs by a trading day because we expect our
        # computed results to be computed using values anchored on the
        # **previous** day's data.
        expected_raw = rolling_mean(
            self.writer.expected_values_2d(
                dates - self.trading_day,
                assets,
                'close',
            ),
            window_length,
            min_periods=1,
        )

        expected = DataFrame(
            # Truncate off the extra rows needed to compute the SMAs.
            expected_raw[window_length:],
            index=dates_to_test,  # dates_to_test is dates[window_length:]
            columns=self.finder.retrieve_all(assets),
        )
        self.write_nans(expected)
        result = results['sma'].unstack()
        assert_frame_equal(result, expected)
Пример #25
0
    def test_numeric_factor(self):
        constants = self.constants
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        num_dates = 5
        dates = self.dates[10:10 + num_dates]
        high, low = USEquityPricing.high, USEquityPricing.low
        open, close = USEquityPricing.open, USEquityPricing.close

        high_minus_low = RollingSumDifference(inputs=[high, low])
        open_minus_close = RollingSumDifference(inputs=[open, close])
        avg = (high_minus_low + open_minus_close) / 2

        results = engine.run_pipeline(
            Pipeline(columns={
                'high_low': high_minus_low,
                'open_close': open_minus_close,
                'avg': avg,
            }, ),
            dates[0],
            dates[-1],
        )

        high_low_result = results['high_low'].unstack()
        expected_high_low = 3.0 * (constants[high] - constants[low])
        assert_frame_equal(
            high_low_result,
            DataFrame(expected_high_low, index=dates, columns=self.assets),
        )

        open_close_result = results['open_close'].unstack()
        expected_open_close = 3.0 * (constants[open] - constants[close])
        assert_frame_equal(
            open_close_result,
            DataFrame(expected_open_close, index=dates, columns=self.assets),
        )

        avg_result = results['avg'].unstack()
        expected_avg = (expected_high_low + expected_open_close) / 2.0
        assert_frame_equal(
            avg_result,
            DataFrame(expected_avg, index=dates, columns=self.assets),
        )
Пример #26
0
    def test_multiple_rolling_factors(self):

        loader = self.loader
        finder = self.asset_finder
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )
        shape = num_dates, num_assets = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        short_factor = RollingSumDifference(window_length=3)
        long_factor = RollingSumDifference(window_length=5)
        high_factor = RollingSumDifference(
            window_length=3,
            inputs=[USEquityPricing.open, USEquityPricing.high],
        )

        pipeline = Pipeline(
            columns={
                'short': short_factor,
                'long': long_factor,
                'high': high_factor,
            }
        )
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        self.assertEqual(set(results.columns), {'short', 'high', 'long'})
        assert_multi_index_is_product(
            self, results.index, dates, finder.retrieve_all(assets)
        )

        # row-wise sum over an array whose values are all (1 - 2)
        check_arrays(
            results['short'].unstack().values,
            full(shape, -short_factor.window_length),
        )
        check_arrays(
            results['long'].unstack().values,
            full(shape, -long_factor.window_length),
        )
        # row-wise sum over an array whose values are all (1 - 3)
        check_arrays(
            results['high'].unstack().values,
            full(shape, -2 * high_factor.window_length),
        )
Пример #27
0
    def test_multiple_rolling_factors(self):

        loader = self.loader
        finder = self.asset_finder
        assets = self.assets
        engine = SimplePipelineEngine(
            lambda column: loader,
            self.dates,
            self.asset_finder,
        )
        shape = num_dates, num_assets = (5, len(assets))
        dates = self.dates[10:10 + num_dates]

        short_factor = RollingSumDifference(window_length=3)
        long_factor = RollingSumDifference(window_length=5)
        high_factor = RollingSumDifference(
            window_length=3,
            inputs=[USEquityPricing.open, USEquityPricing.high],
        )

        pipeline = Pipeline(columns={
            'short': short_factor,
            'long': long_factor,
            'high': high_factor,
        })
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        self.assertEqual(set(results.columns), {'short', 'high', 'long'})
        assert_multi_index_is_product(self, results.index, dates,
                                      finder.retrieve_all(assets))

        # row-wise sum over an array whose values are all (1 - 2)
        check_arrays(
            results['short'].unstack().values,
            full(shape, -short_factor.window_length),
        )
        check_arrays(
            results['long'].unstack().values,
            full(shape, -long_factor.window_length),
        )
        # row-wise sum over an array whose values are all (1 - 3)
        check_arrays(
            results['high'].unstack().values,
            full(shape, -2 * high_factor.window_length),
        )
Пример #28
0
    def test_factor_with_single_output(self):
        """
        Test passing an `outputs` parameter of length 1 to a CustomFactor.
        """
        dates = self.dates[5:10]
        assets = self.assets
        num_dates = len(dates)
        open = USEquityPricing.open
        open_values = [self.constants[open]] * num_dates
        open_values_as_tuple = [(self.constants[open], )] * num_dates
        engine = SimplePipelineEngine(
            lambda column: self.loader,
            self.dates,
            self.asset_finder,
        )

        single_output = OpenPrice(outputs=['open'])
        pipeline = Pipeline(columns={
            'open_instance': single_output,
            'open_attribute': single_output.open,
        }, )
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        # The instance `single_output` itself will compute a numpy.recarray
        # when added as a column to our pipeline, so we expect its output
        # values to be 1-tuples.
        open_instance_expected = {
            asset: open_values_as_tuple
            for asset in assets
        }
        open_attribute_expected = {asset: open_values for asset in assets}

        for colname, expected_values in (('open_instance',
                                          open_instance_expected),
                                         ('open_attribute',
                                          open_attribute_expected)):
            column_results = results[colname].unstack()
            expected_results = DataFrame(
                expected_values,
                index=dates,
                columns=assets,
                dtype=float64,
            )
            assert_frame_equal(column_results, expected_results)
Пример #29
0
    def test_drawdown(self):
        # The monotonically-increasing data produced by SyntheticDailyBarWriter
        # exercises two pathological cases for MaxDrawdown.  The actual
        # computed results are pretty much useless (everything is either NaN)
        # or zero, but verifying we correctly handle those corner cases is
        # valuable.
        engine = SimplePipelineEngine(
            lambda column: self.pipeline_loader,
            self.env.trading_days,
            self.finder,
        )
        window_length = 5
        assets = self.all_assets
        dates = date_range(
            self.first_asset_start + self.trading_day,
            self.last_asset_end,
            freq=self.trading_day,
        )
        dates_to_test = dates[window_length:]

        drawdown = MaxDrawdown(
            inputs=(USEquityPricing.close, ),
            window_length=window_length,
        )

        results = engine.run_pipeline(
            Pipeline(columns={'drawdown': drawdown}),
            dates_to_test[0],
            dates_to_test[-1],
        )

        # We expect NaNs when the asset was undefined, otherwise 0 everywhere,
        # since the input is always increasing.
        expected = DataFrame(
            data=zeros((len(dates_to_test), len(assets)), dtype=float),
            index=dates_to_test,
            columns=self.finder.retrieve_all(assets),
        )
        self.write_nans(expected)
        result = results['drawdown'].unstack()

        assert_frame_equal(expected, result)
Пример #30
0
    def test_screen(self):
        loader = self.loader
        finder = self.asset_finder
        asset_ids = array(self.asset_ids)
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)
        num_dates = 5
        dates = self.dates[10 : 10 + num_dates]

        factor = AssetID()
        for asset_id in asset_ids:
            p = Pipeline(columns={"f": factor}, screen=factor <= asset_id)
            result = engine.run_pipeline(p, dates[0], dates[-1])

            expected_sids = asset_ids[asset_ids <= asset_id]
            expected_assets = finder.retrieve_all(expected_sids)
            expected_result = DataFrame(
                index=MultiIndex.from_product([dates, expected_assets]),
                data=tile(expected_sids.astype(float), [len(dates)]),
                columns=["f"],
            )

            assert_frame_equal(result, expected_result)
Пример #31
0
    def test_input_dates_provided_by_default(self):
        loader = self.loader
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        class TestFactor(CustomFactor):
            inputs = [InputDates(), USEquityPricing.close]
            window_length = 10
            dtype = datetime64ns_dtype

            def compute(self, today, assets, out, dates, closes):
                first, last = dates[[0, -1], 0]
                assert last == today.asm8
                assert len(dates) == len(closes) == self.window_length
                out[:] = first

        p = Pipeline(columns={'t': TestFactor()})
        results = engine.run_pipeline(p, self.dates[9], self.dates[10])

        # All results are the same, so just grab one column.
        column = results.unstack().iloc[:, 0].values
        check_arrays(column, self.dates[:2].values)
Пример #32
0
    def test_multiple_rolling_factors(self):

        loader = self.loader
        assets = self.assets
        engine = SimplePipelineEngine(lambda column: loader, self.dates, self.asset_finder)
        shape = num_dates, num_assets = (5, len(assets))
        dates = self.dates[10 : 10 + num_dates]

        short_factor = RollingSumDifference(window_length=3)
        long_factor = RollingSumDifference(window_length=5)
        high_factor = RollingSumDifference(window_length=3, inputs=[USEquityPricing.open, USEquityPricing.high])

        pipeline = Pipeline(columns={"short": short_factor, "long": long_factor, "high": high_factor})
        results = engine.run_pipeline(pipeline, dates[0], dates[-1])

        self.assertEqual(set(results.columns), {"short", "high", "long"})
        assert_multi_index_is_product(self, results.index, dates, assets)

        # row-wise sum over an array whose values are all (1 - 2)
        check_arrays(results["short"].unstack().values, full(shape, -short_factor.window_length, dtype=float))
        check_arrays(results["long"].unstack().values, full(shape, -long_factor.window_length, dtype=float))
        # row-wise sum over an array whose values are all (1 - 3)
        check_arrays(results["high"].unstack().values, full(shape, -2 * high_factor.window_length, dtype=float))
Пример #33
0
    def test_compute_with_adjustments(self):
        dates, assets = self.dates, self.assets
        low, high = USEquityPricing.low, USEquityPricing.high
        apply_idxs = [3, 10, 16]

        def apply_date(idx, offset=0):
            return dates[apply_idxs[idx] + offset]

        adjustments = DataFrame.from_records([
            dict(
                kind=MULTIPLY,
                sid=assets[1],
                value=2.0,
                start_date=None,
                end_date=apply_date(0, offset=-1),
                apply_date=apply_date(0),
            ),
            dict(
                kind=MULTIPLY,
                sid=assets[1],
                value=3.0,
                start_date=None,
                end_date=apply_date(1, offset=-1),
                apply_date=apply_date(1),
            ),
            dict(
                kind=MULTIPLY,
                sid=assets[1],
                value=5.0,
                start_date=None,
                end_date=apply_date(2, offset=-1),
                apply_date=apply_date(2),
            ),
        ])
        low_base = DataFrame(self.make_frame(30.0))
        low_loader = DataFrameLoader(low, low_base.copy(), adjustments=None)

        # Pre-apply inverse of adjustments to the baseline.
        high_base = DataFrame(self.make_frame(30.0))
        high_base.iloc[:apply_idxs[0], 1] /= 2.0
        high_base.iloc[:apply_idxs[1], 1] /= 3.0
        high_base.iloc[:apply_idxs[2], 1] /= 5.0

        high_loader = DataFrameLoader(high, high_base, adjustments)

        engine = SimplePipelineEngine(
            {
                low: low_loader,
                high: high_loader
            }.__getitem__,
            self.dates,
            self.asset_finder,
        )

        for window_length in range(1, 4):
            low_mavg = SimpleMovingAverage(
                inputs=[USEquityPricing.low],
                window_length=window_length,
            )
            high_mavg = SimpleMovingAverage(
                inputs=[USEquityPricing.high],
                window_length=window_length,
            )
            bounds = product_upper_triangle(range(window_length, len(dates)))
            for start, stop in bounds:
                results = engine.run_pipeline(
                    Pipeline(columns={
                        'low': low_mavg,
                        'high': high_mavg
                    }),
                    dates[start],
                    dates[stop],
                )
                self.assertEqual(set(results.columns), {'low', 'high'})
                iloc_bounds = slice(start, stop + 1)  # +1 to include end date

                low_results = results.unstack()['low']
                assert_frame_equal(low_results, low_base.iloc[iloc_bounds])

                high_results = results.unstack()['high']
                assert_frame_equal(high_results, high_base.iloc[iloc_bounds])
Пример #34
0
    def test_loader_given_multiple_columns(self):
        class Loader1DataSet1(DataSet):
            col1 = Column(float32)
            col2 = Column(float32)

        class Loader1DataSet2(DataSet):
            col1 = Column(float32)
            col2 = Column(float32)

        class Loader2DataSet(DataSet):
            col1 = Column(float32)
            col2 = Column(float32)

        constants1 = {
            Loader1DataSet1.col1: 1,
            Loader1DataSet1.col2: 2,
            Loader1DataSet2.col1: 3,
            Loader1DataSet2.col2: 4
        }
        loader1 = RecordingConstantLoader(constants=constants1,
                                          dates=self.dates,
                                          assets=self.assets)
        constants2 = {Loader2DataSet.col1: 5, Loader2DataSet.col2: 6}
        loader2 = RecordingConstantLoader(constants=constants2,
                                          dates=self.dates,
                                          assets=self.assets)

        engine = SimplePipelineEngine(
            lambda column: loader2
            if column.dataset == Loader2DataSet else loader1,
            self.dates,
            self.asset_finder,
        )

        pipe_col1 = RollingSumSum(inputs=[
            Loader1DataSet1.col1, Loader1DataSet2.col1, Loader2DataSet.col1
        ],
                                  window_length=2)

        pipe_col2 = RollingSumSum(inputs=[
            Loader1DataSet1.col2, Loader1DataSet2.col2, Loader2DataSet.col2
        ],
                                  window_length=3)

        pipe_col3 = RollingSumSum(inputs=[Loader2DataSet.col1],
                                  window_length=3)

        columns = OrderedDict([
            ('pipe_col1', pipe_col1),
            ('pipe_col2', pipe_col2),
            ('pipe_col3', pipe_col3),
        ])
        result = engine.run_pipeline(
            Pipeline(columns=columns),
            self.dates[2],  # index is >= the largest window length - 1
            self.dates[-1])
        min_window = min(pip_col.window_length
                         for pip_col in itervalues(columns))
        col_to_val = ChainMap(constants1, constants2)
        vals = {
            name: (sum(col_to_val[col]
                       for col in pipe_col.inputs) * pipe_col.window_length)
            for name, pipe_col in iteritems(columns)
        }

        index = MultiIndex.from_product([self.dates[2:], self.assets])
        expected = DataFrame(data={
            col:
            concatenate((full(
                (columns[col].window_length - min_window) * index.levshape[1],
                nan),
                         full((index.levshape[0] -
                               (columns[col].window_length - min_window)) *
                              index.levshape[1], val)))
            for col, val in iteritems(vals)
        },
                             index=index,
                             columns=columns)

        assert_frame_equal(result, expected)

        self.assertEqual(
            set(loader1.load_calls), {
                ColumnArgs.sorted_by_ds(Loader1DataSet1.col1,
                                        Loader1DataSet2.col1),
                ColumnArgs.sorted_by_ds(Loader1DataSet1.col2,
                                        Loader1DataSet2.col2)
            })
        self.assertEqual(set(loader2.load_calls), {
            ColumnArgs.sorted_by_ds(Loader2DataSet.col1, Loader2DataSet.col2)
        })
Пример #35
0

# Set the trading calendar
trading_calendar = get_calendar('NYSE')

# Create a Pipeline engine
engine = SimplePipelineEngine(get_loader=choose_loader,
                              calendar=trading_calendar.all_sessions,
                              asset_finder=bundle_data.asset_finder)

# Set the start and end dates
start_date = pd.Timestamp('2016-01-05', tz='utc')
end_date = pd.Timestamp('2016-01-05', tz='utc')

# Run our pipeline for the given start and end dates
pipeline_output = engine.run_pipeline(pipeline, start_date, end_date)

# Get the values in index level 1 and save them to a list
universe_tickers = pipeline_output.index.get_level_values(1).values.tolist()

# Create a data portal
data_portal = DataPortal(
    bundle_data.asset_finder,
    trading_calendar=trading_calendar,
    first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day,
    equity_daily_reader=bundle_data.equity_daily_bar_reader,
    adjustment_reader=bundle_data.adjustment_reader)


def get_pricing(data_portal,
                trading_calendar,
Пример #36
0
    def test_factor_with_multiple_outputs(self):
        dates = self.dates[5:10]
        assets = self.assets
        asset_ids = self.asset_ids
        constants = self.constants
        open = USEquityPricing.open
        close = USEquityPricing.close
        engine = SimplePipelineEngine(lambda column: self.loader, self.dates, self.asset_finder)

        def create_expected_results(expected_value, mask):
            expected_values = where(mask, expected_value, nan)
            return DataFrame(expected_values, index=dates, columns=assets)

        cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day)
        expected_cascading_mask_result = array(
            [
                [True, True, True, False],
                [True, True, False, False],
                [True, False, False, False],
                [False, False, False, False],
                [False, False, False, False],
            ],
            dtype=bool,
        )

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        expected_alternating_mask_result = array(
            [
                [False, True, False, True],
                [True, False, True, False],
                [False, True, False, True],
                [True, False, True, False],
                [False, True, False, True],
            ],
            dtype=bool,
        )

        expected_no_mask_result = array(
            [
                [True, True, True, True],
                [True, True, True, True],
                [True, True, True, True],
                [True, True, True, True],
                [True, True, True, True],
            ],
            dtype=bool,
        )

        masks = cascading_mask, alternating_mask, NotSpecified
        expected_mask_results = (
            expected_cascading_mask_result,
            expected_alternating_mask_result,
            expected_no_mask_result,
        )
        for mask, expected_mask in zip(masks, expected_mask_results):
            open_price, close_price = MultipleOutputs(mask=mask)
            pipeline = Pipeline(columns={"open_price": open_price, "close_price": close_price})
            if mask is not NotSpecified:
                pipeline.add(mask, "mask")

            results = engine.run_pipeline(pipeline, dates[0], dates[-1])
            for colname, case_column in (("open_price", open), ("close_price", close)):
                if mask is not NotSpecified:
                    mask_results = results["mask"].unstack()
                    check_arrays(mask_results.values, expected_mask)
                output_results = results[colname].unstack()
                output_expected = create_expected_results(constants[case_column], expected_mask)
                assert_frame_equal(output_results, output_expected)
Пример #37
0
    bundle_data.adjustment_reader,
)


def choose_loader(column):
    if column in USEquityPricing.columns:
        return pipeline_loader
    raise ValueError("No PipelineLoader registered for column %s." % column)


#data_frequency = 'daily',
#capital_base = DEFAULT_CAPITAL_BASE

start = '2015-9-1'  # 必须在国内交易日
end = '2017-9-8'  # 必须在国内交易日

#print Date(tz='utc', as_timestamp=True).parser(start)
#perf_tracker = None
# Pull in the environment's new AssetFinder for quick reference
#print trading_calendar.all_sessions

your_engine = SimplePipelineEngine(
    get_loader=choose_loader,
    calendar=trading_calendar.all_sessions,
    asset_finder=trading_environment.asset_finder)
result = your_engine.run_pipeline(
    my_pipe,
    Date(tz='utc', as_timestamp=True).parser(start),
    Date(tz='utc', as_timestamp=True).parser(end))
print result
Пример #38
0
    def test_compute_with_adjustments(self):
        dates, asset_ids = self.dates, self.asset_ids
        low, high = USEquityPricing.low, USEquityPricing.high
        apply_idxs = [3, 10, 16]

        def apply_date(idx, offset=0):
            return dates[apply_idxs[idx] + offset]

        adjustments = DataFrame.from_records(
            [
                dict(
                    kind=MULTIPLY,
                    sid=asset_ids[1],
                    value=2.0,
                    start_date=None,
                    end_date=apply_date(0, offset=-1),
                    apply_date=apply_date(0),
                ),
                dict(
                    kind=MULTIPLY,
                    sid=asset_ids[1],
                    value=3.0,
                    start_date=None,
                    end_date=apply_date(1, offset=-1),
                    apply_date=apply_date(1),
                ),
                dict(
                    kind=MULTIPLY,
                    sid=asset_ids[1],
                    value=5.0,
                    start_date=None,
                    end_date=apply_date(2, offset=-1),
                    apply_date=apply_date(2),
                ),
            ]
        )
        low_base = DataFrame(self.make_frame(30.0))
        low_loader = DataFrameLoader(low, low_base.copy(), adjustments=None)

        # Pre-apply inverse of adjustments to the baseline.
        high_base = DataFrame(self.make_frame(30.0))
        high_base.iloc[:apply_idxs[0], 1] /= 2.0
        high_base.iloc[:apply_idxs[1], 1] /= 3.0
        high_base.iloc[:apply_idxs[2], 1] /= 5.0

        high_loader = DataFrameLoader(high, high_base, adjustments)

        engine = SimplePipelineEngine(
            {low: low_loader, high: high_loader}.__getitem__,
            self.dates,
            self.asset_finder,
        )

        for window_length in range(1, 4):
            low_mavg = SimpleMovingAverage(
                inputs=[USEquityPricing.low],
                window_length=window_length,
            )
            high_mavg = SimpleMovingAverage(
                inputs=[USEquityPricing.high],
                window_length=window_length,
            )
            bounds = product_upper_triangle(range(window_length, len(dates)))
            for start, stop in bounds:
                results = engine.run_pipeline(
                    Pipeline(
                        columns={'low': low_mavg, 'high': high_mavg}
                    ),
                    dates[start],
                    dates[stop],
                )
                self.assertEqual(set(results.columns), {'low', 'high'})
                iloc_bounds = slice(start, stop + 1)  # +1 to include end date

                low_results = results.unstack()['low']
                assert_frame_equal(low_results, low_base.iloc[iloc_bounds])

                high_results = results.unstack()['high']
                assert_frame_equal(high_results, high_base.iloc[iloc_bounds])
Пример #39
0
    def test_loader_given_multiple_columns(self):

        class Loader1DataSet1(DataSet):
            col1 = Column(float)
            col2 = Column(float32)

        class Loader1DataSet2(DataSet):
            col1 = Column(float32)
            col2 = Column(float32)

        class Loader2DataSet(DataSet):
            col1 = Column(float32)
            col2 = Column(float32)

        constants1 = {Loader1DataSet1.col1: 1,
                      Loader1DataSet1.col2: 2,
                      Loader1DataSet2.col1: 3,
                      Loader1DataSet2.col2: 4}

        loader1 = RecordingPrecomputedLoader(constants=constants1,
                                             dates=self.dates,
                                             sids=self.assets)
        constants2 = {Loader2DataSet.col1: 5,
                      Loader2DataSet.col2: 6}
        loader2 = RecordingPrecomputedLoader(constants=constants2,
                                             dates=self.dates,
                                             sids=self.assets)

        engine = SimplePipelineEngine(
            lambda column:
            loader2 if column.dataset == Loader2DataSet else loader1,
            self.dates, self.asset_finder,
        )

        pipe_col1 = RollingSumSum(inputs=[Loader1DataSet1.col1,
                                          Loader1DataSet2.col1,
                                          Loader2DataSet.col1],
                                  window_length=2)

        pipe_col2 = RollingSumSum(inputs=[Loader1DataSet1.col2,
                                          Loader1DataSet2.col2,
                                          Loader2DataSet.col2],
                                  window_length=3)

        pipe_col3 = RollingSumSum(inputs=[Loader2DataSet.col1],
                                  window_length=3)

        columns = OrderedDict([
            ('pipe_col1', pipe_col1),
            ('pipe_col2', pipe_col2),
            ('pipe_col3', pipe_col3),
        ])
        result = engine.run_pipeline(
            Pipeline(columns=columns),
            self.dates[2],  # index is >= the largest window length - 1
            self.dates[-1]
        )
        min_window = min(pip_col.window_length
                         for pip_col in itervalues(columns))
        col_to_val = ChainMap(constants1, constants2)
        vals = {name: (sum(col_to_val[col] for col in pipe_col.inputs)
                       * pipe_col.window_length)
                for name, pipe_col in iteritems(columns)}

        index = MultiIndex.from_product([self.dates[2:], self.assets])

        def expected_for_col(col):
            val = vals[col]
            offset = columns[col].window_length - min_window
            return concatenate(
                [
                    full(offset * index.levshape[1], nan),
                    full(
                        (index.levshape[0] - offset) * index.levshape[1],
                        val,
                        float,
                    )
                ],
            )

        expected = DataFrame(
            data={col: expected_for_col(col) for col in vals},
            index=index,
            columns=columns,
        )

        assert_frame_equal(result, expected)

        self.assertEqual(set(loader1.load_calls),
                         {ColumnArgs.sorted_by_ds(Loader1DataSet1.col1,
                                                  Loader1DataSet2.col1),
                          ColumnArgs.sorted_by_ds(Loader1DataSet1.col2,
                                                  Loader1DataSet2.col2)})
        self.assertEqual(set(loader2.load_calls),
                         {ColumnArgs.sorted_by_ds(Loader2DataSet.col1,
                                                  Loader2DataSet.col2)})
Пример #40
0
    def test_masked_factor(self):
        """
        Test that a Custom Factor computes the correct values when passed a
        mask. The mask/filter should be applied prior to computing any values,
        as opposed to computing the factor across the entire universe of
        assets. Any assets that are filtered out should be filled with missing
        values.
        """
        loader = self.loader
        dates = self.dates[5:8]
        assets = self.assets
        asset_ids = self.asset_ids
        constants = self.constants
        open = USEquityPricing.open
        close = USEquityPricing.close
        engine = SimplePipelineEngine(
            lambda column: loader, self.dates, self.asset_finder,
        )

        factor1_value = constants[open]
        factor2_value = 3.0 * (constants[open] - constants[close])

        def create_expected_results(expected_value, mask):
            expected_values = where(mask, expected_value, nan)
            return DataFrame(expected_values, index=dates, columns=assets)

        cascading_mask = AssetIDPlusDay() < (asset_ids[-1] + dates[0].day)
        expected_cascading_mask_result = array(
            [[True,  True,  True, False],
             [True,  True, False, False],
             [True, False, False, False]],
            dtype=bool,
        )

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        expected_alternating_mask_result = array(
            [[False,  True, False,   True],
             [True,  False,  True,  False],
             [False,  True, False,   True]],
            dtype=bool,
        )

        masks = cascading_mask, alternating_mask
        expected_mask_results = (
            expected_cascading_mask_result,
            expected_alternating_mask_result,
        )
        for mask, expected_mask in zip(masks, expected_mask_results):
            # Test running a pipeline with a single masked factor.
            columns = {'factor1': OpenPrice(mask=mask), 'mask': mask}
            pipeline = Pipeline(columns=columns)
            results = engine.run_pipeline(pipeline, dates[0], dates[-1])

            mask_results = results['mask'].unstack()
            check_arrays(mask_results.values, expected_mask)

            factor1_results = results['factor1'].unstack()
            factor1_expected = create_expected_results(factor1_value,
                                                       mask_results)
            assert_frame_equal(factor1_results, factor1_expected)

            # Test running a pipeline with a second factor. This ensures that
            # adding another factor to the pipeline with a different window
            # length does not cause any unexpected behavior, especially when
            # both factors share the same mask.
            columns['factor2'] = RollingSumDifference(mask=mask)
            pipeline = Pipeline(columns=columns)
            results = engine.run_pipeline(pipeline, dates[0], dates[-1])

            mask_results = results['mask'].unstack()
            check_arrays(mask_results.values, expected_mask)

            factor1_results = results['factor1'].unstack()
            factor2_results = results['factor2'].unstack()
            factor1_expected = create_expected_results(factor1_value,
                                                       mask_results)
            factor2_expected = create_expected_results(factor2_value,
                                                       mask_results)
            assert_frame_equal(factor1_results, factor1_expected)
            assert_frame_equal(factor2_results, factor2_expected)
Пример #41
0
def run_pipeline(pipeline, start_date, end_date=None, bundle=None):
    """
    Compute values for pipeline from start_date to end_date, using the specified
    bundle or the default bundle.

    Parameters
    ----------
    pipeline : Pipeline, required
        The pipeline to run.

    start_date : str (YYYY-MM-DD), required
        First date on which the pipeline should run. If start_date is not a trading
        day, the pipeline will start on the first trading day after start_date.

    end_date : str (YYYY-MM-DD), optional
        Last date on which the pipeline should run. If end_date is not a trading
        day, the pipeline will end on the first trading day after end_date.
        Defaults to today.

    bundle : str, optional
        the bundle code. If omitted, the default bundle will be used (and must be set).

    Returns
    -------
    result : pd.DataFrame
        A frame of computed results. The result columns correspond to the entries
        of pipeline.columns, which should be a dictionary mapping strings to instances
        of zipline.pipeline.term.Term. For each date between start_date and end_date,
        result will contain a row for each asset that passed pipeline.screen. A screen
        of None indicates that a row should be returned for each asset that existed each
        day.

    Examples
    --------
    Get a pipeline of 1-year returns:

    >>> from zipline.pipeline.factors import Returns
    >>> pipeline = Pipeline(                                                                  # doctest: +SKIP
            columns={
                '1Y': Returns(window_length=252),
            })
    >>> factor = run_pipeline(pipeline, '2018-01-01', '2019-02-01', bundle="usstock-1min")    # doctest: +SKIP
    """

    if not bundle:
        bundle = get_default_bundle()
        if not bundle:
            raise ValidationError("you must specify a bundle or set a default bundle")
        bundle = bundle["default_bundle"]

    load_extensions(code=bundle)

    bundle_data = bundles.load(
        bundle,
        os.environ,
        pd.Timestamp.utcnow(),
    )

    calendar_name = bundles.bundles[bundle].calendar_name
    trading_calendar = get_calendar(calendar_name)

    start_date = pd.Timestamp(start_date)

    if start_date.tz:
        start_date = start_date.tz_convert("UTC")
    else:
        start_date = start_date.tz_localize("UTC")

    if end_date:
        end_date = pd.Timestamp(end_date)
    else:
        end_date = pd.Timestamp.now().normalize()

    if end_date.tz:
        end_date = end_date.tz_convert("UTC")
    else:
        end_date = end_date.tz_localize("UTC")

    first_session = max(bundles.bundles[bundle].start_session, trading_calendar.first_session)
    if start_date < first_session:
        raise ValidationError(
            f"start_date cannot be earlier than {first_session.date().isoformat()} for this bundle")

    # Roll-forward start_date to valid session
    for i in range(100):
        if trading_calendar.is_session(start_date):
            break
        start_date += pd.Timedelta(days=1)
    else:
        raise ValidationError(f"start_date is not in {calendar_name} calendar")

    # Roll-forward end_date to valid session
    for i in range(100):
        if trading_calendar.is_session(end_date):
            break
        end_date += pd.Timedelta(days=1)
    else:
        raise ValidationError("end_date is not in calendar")

    if (
        end_date < start_date):
        raise ValidationError("end_date cannot be earlier than start_date")

    default_pipeline_loader = EquityPricingLoader.without_fx(
        bundle_data.equity_daily_bar_reader,
        bundle_data.adjustment_reader,
    )
    asset_finder = asset_finder_cache.get(bundle, bundle_data.asset_finder)
    asset_finder_cache[bundle] = asset_finder

    pipeline_loader = QuantRocketPipelineLoaderRouter(
        asset_db_conn=asset_finder.engine,
        calendar=trading_calendar,
        default_loader=default_pipeline_loader,
        default_loader_columns=EquityPricing.columns
    )

    calendar_domain = domain.get_domain_from_calendar(trading_calendar)

    engine = SimplePipelineEngine(
        pipeline_loader,
        asset_finder,
        calendar_domain)

    return engine.run_pipeline(pipeline, start_date, end_date)
Пример #42
0
def initialize(context):
    dates = pd.date_range('2018-01-01', '2018-09-28')
    # assets = bundle_data.asset_finder.lookup_symbols(['A', 'AAL'], as_of_date=None)
    # assets = bundle_data.asset_finder
    sids = bundle_data.asset_finder.sids
    assets = [sid(item) for item in sids]

    # The values for Column A will just be a 2D array of numbers ranging from 1 -> N.
    column_A_frame = pd.DataFrame(
        data=np.arange(len(dates) * len(assets), dtype=float).reshape(len(dates), len(assets)),
        index=dates,
        columns=sids,
    )

    # Column B will always provide True for 0 and False for 1.
    column_B_frame = pd.DataFrame(data={sids[0]: True, sids[1]: False}, index=dates)

    loaders = {
        MyDataSet.column_A: DataFrameLoader(MyDataSet.column_A, column_A_frame),
        MyDataSet.column_B: DataFrameLoader(MyDataSet.column_B, column_B_frame),
    }

    def my_dispatcher(column):
        return loaders[column]

    # Set up pipeline engine

    # Loader for pricing
    pipeline_loader = USEquityPricingLoader(
        bundle_data.equity_daily_bar_reader,
        bundle_data.adjustment_reader,
    )

    def choose_loader(column):
        if column in USEquityPricing.columns:
            return pipeline_loader
        return my_dispatcher(column)

    engine = SimplePipelineEngine(
        get_loader=choose_loader,
        calendar=trading_calendar.all_sessions,
        asset_finder=bundle_data.asset_finder,
    )

    p = Pipeline(
        columns={
            'price': USEquityPricing.close.latest,
            'col_A': MyDataSet.column_A.latest,
            'col_B': MyDataSet.column_B.latest
        },
        screen=StaticAssets(assets)
    )

    df = engine.run_pipeline(
        p,
        pd.Timestamp('2016-01-07', tz='utc'),
        pd.Timestamp('2016-01-07', tz='utc')
    )

    df = df.sort_values(by=['price'], axis=0, ascending=False)

    print(df)