Пример #1
0
    def check_downsampled_term(self, term):

        #       June 2014
        # Mo Tu We Th Fr Sa Su
        #                    1
        #  2  3  4  5  6  7  8
        #  9 10 11 12 13 14 15
        # 16 17 18 19 20 21 22
        # 23 24 25 26 27 28 29
        # 30
        all_sessions = self.nyse_sessions
        compute_dates = all_sessions[
            all_sessions.slice_indexer('2014-06-05', '2015-01-06')
        ]
        start_date, end_date = compute_dates[[0, -1]]

        pipe = Pipeline({
            'year': term.downsample(frequency='year_start'),
            'quarter': term.downsample(frequency='quarter_start'),
            'month': term.downsample(frequency='month_start'),
            'week': term.downsample(frequency='week_start'),
        })

        # Raw values for term, computed each day from 2014 to the end of the
        # target period.
        raw_term_results = self.run_pipeline(
            Pipeline({'term': term}),
            start_date=pd.Timestamp('2014-01-02', tz='UTC'),
            end_date=pd.Timestamp('2015-01-06', tz='UTC'),
        )['term'].unstack()

        expected_results = {
            'year': (raw_term_results
                     .groupby(pd.TimeGrouper('AS'))
                     .first()
                     .reindex(compute_dates, method='ffill')),
            'quarter': (raw_term_results
                        .groupby(pd.TimeGrouper('QS'))
                        .first()
                        .reindex(compute_dates, method='ffill')),
            'month': (raw_term_results
                      .groupby(pd.TimeGrouper('MS'))
                      .first()
                      .reindex(compute_dates, method='ffill')),
            'week': (raw_term_results
                     .groupby(pd.TimeGrouper('W', label='left'))
                     .first()
                     .reindex(compute_dates, method='ffill')),
        }

        results = self.run_pipeline(pipe, start_date, end_date)

        for frequency in expected_results:
            result = results[frequency].unstack()
            expected = expected_results[frequency]
            assert_frame_equal(result, expected)
Пример #2
0
    def test_correlation_and_regression_with_bad_asset(self):
        """
        Test that `RollingPearsonOfReturns`, `RollingSpearmanOfReturns` and
        `RollingLinearRegressionOfReturns` raise the proper exception when
        given a nonexistent target asset.
        """
        my_asset = Equity(0, exchange="TEST")
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        # This filter is arbitrary; the important thing is that we test each
        # factor both with and without a specified mask.
        my_asset_filter = AssetID().eq(1)

        for mask in (NotSpecified, my_asset_filter):
            pearson_factor = RollingPearsonOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=3,
                mask=mask,
            )
            spearman_factor = RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=3,
                mask=mask,
            )
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=3,
                regression_length=3,
                mask=mask,
            )

            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'pearson_factor': pearson_factor}),
                    start_date,
                    end_date,
                )
            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'spearman_factor': spearman_factor}),
                    start_date,
                    end_date,
                )
            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'regression_factor': regression_factor}),
                    start_date,
                    end_date,
                )
Пример #3
0
    def test_adding_slice_column(self):
        """
        Test that slices cannot be added as a pipeline column.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])
        open_slice = OpenPrice()[my_asset]

        with self.assertRaises(UnsupportedPipelineOutput):
            Pipeline(columns={'open_slice': open_slice})

        pipe = Pipeline(columns={})
        with self.assertRaises(UnsupportedPipelineOutput):
            pipe.add(open_slice, 'open_slice')
Пример #4
0
    def test_slice_with_masking(self, unmasked_column, slice_column):
        """
        Test that masking a factor that uses slices as inputs does not mask the
        slice data.
        """
        sids = self.sids
        asset_finder = self.asset_finder
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        # Create a filter that masks out all but a single asset.
        unmasked_asset = asset_finder.retrieve_asset(sids[unmasked_column])
        unmasked_asset_only = (AssetID().eq(unmasked_asset.sid))

        # Asset used to create our slice. In the cases where this is different
        # than `unmasked_asset`, our slice should still have non-missing data
        # when used as an input to our custom factor. That is, it should not be
        # masked out.
        slice_asset = asset_finder.retrieve_asset(sids[slice_column])

        returns = Returns(window_length=2, inputs=[self.col])
        returns_slice = returns[slice_asset]

        returns_results = self.run_pipeline(
            Pipeline(columns={'returns': returns}),
            start_date,
            end_date,
        )
        returns_results = returns_results['returns'].unstack()

        class UsesSlicedInput(CustomFactor):
            window_length = 1
            inputs = [returns, returns_slice]

            def compute(self, today, assets, out, returns, returns_slice):
                # Ensure that our mask correctly affects the `returns` input
                # and does not affect the `returns_slice` input.
                assert returns.shape == (1, 1)
                assert returns_slice.shape == (1, 1)
                assert returns[0, 0] == \
                    returns_results.loc[today, unmasked_asset]
                assert returns_slice[0, 0] == \
                    returns_results.loc[today, slice_asset]

        columns = {'masked': UsesSlicedInput(mask=unmasked_asset_only)}

        # Assertions about the expected data are made in the `compute` function
        # of our custom factor above.
        self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
Пример #5
0
    def test_slice(self, my_asset_column, window_length_):
        """
        Test that slices can be created by indexing into a term, and that they
        have the correct shape when used as inputs.
        """
        sids = self.sids
        my_asset = self.asset_finder.retrieve_asset(self.sids[my_asset_column])

        returns = Returns(window_length=2, inputs=[self.col])
        returns_slice = returns[my_asset]

        class UsesSlicedInput(CustomFactor):
            window_length = window_length_
            inputs = [returns, returns_slice]

            def compute(self, today, assets, out, returns, returns_slice):
                # Make sure that our slice is the correct shape (i.e. has only
                # one column) and that it has the same values as the original
                # returns factor from which it is derived.
                assert returns_slice.shape == (self.window_length, 1)
                assert returns.shape == (self.window_length, len(sids))
                check_arrays(returns_slice[:, 0], returns[:, my_asset_column])

        # Assertions about the expected slice data are made in the `compute`
        # function of our custom factor above.
        self.run_pipeline(
            Pipeline(columns={'uses_sliced_input': UsesSlicedInput()}),
            self.pipeline_start_date,
            self.pipeline_end_date,
        )
Пример #6
0
    def test_load_properly_forward_fills(self):
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        # Cut the dates in half so we need to forward fill some data which
        # is not in our window. The results should be computed the same as if
        # we had computed across the entire window and then sliced after the
        # computation.
        dates = self.trading_days[len(self.trading_days) // 2:]
        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=dates[0],
            end_date=dates[-1],
        )

        for c in EventDataSet.columns:
            if c in self.next_value_columns:
                self.check_next_value_results(
                    c,
                    results[c.name].unstack(),
                    dates,
                )
            elif c in self.previous_value_columns:
                self.check_previous_value_results(
                    c,
                    results[c.name].unstack(),
                    dates,
                )
            else:
                raise AssertionError("Unexpected column %s." % c)
Пример #7
0
    def test_load_with_trading_calendar(self):
        engine = SimplePipelineEngine(
            lambda x: self.loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        for c in EventDataSet.columns:
            if c in self.next_value_columns:
                self.check_next_value_results(
                    c,
                    results[c.name].unstack(),
                    self.trading_days,
                )
            elif c in self.previous_value_columns:
                self.check_previous_value_results(
                    c,
                    results[c.name].unstack(),
                    self.trading_days,
                )
            else:
                raise AssertionError("Unexpected column %s." % c)
Пример #8
0
        def initialize(context):
            pipeline = attach_pipeline(Pipeline(), 'test')

            vwap = VWAP(window_length=10)
            pipeline.add(vwap, 'vwap')

            # Nothing should have prices less than 0.
            pipeline.set_screen(vwap < 0)
Пример #9
0
    def check_equivalent_terms(self, terms):
        self.assertTrue(len(terms) > 1, "Need at least two terms to compare")
        pipe = Pipeline(terms)

        start, end = self.trading_days[[-10, -1]]
        results = self.pipeline_engine.run_pipeline(pipe, start, end)
        first_column = results.iloc[:, 0]
        for name in terms:
            assert_equal(results.loc[:, name], first_column, check_names=False)
Пример #10
0
    def test_factor_correlation_methods(self,
                                        returns_length,
                                        correlation_length):
        """
        Ensure that `Factor.pearsonr` and `Factor.spearmanr` are consistent
        with the built-in factors `RollingPearsonOfReturns` and
        `RollingSpearmanOfReturns`.
        """
        my_asset = self.my_asset
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        pearson = returns.pearsonr(
            target=returns_slice, correlation_length=correlation_length,
        )
        spearman = returns.spearmanr(
            target=returns_slice, correlation_length=correlation_length,
        )
        expected_pearson = RollingPearsonOfReturns(
            target=my_asset,
            returns_length=returns_length,
            correlation_length=correlation_length,
        )
        expected_spearman = RollingSpearmanOfReturns(
            target=my_asset,
            returns_length=returns_length,
            correlation_length=correlation_length,
        )

        # These built-ins construct their own Returns factor to use as inputs,
        # so the only way to set our own inputs is to do so after the fact.
        # This should not be done in practice. It is necessary here because we
        # want Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_pearson.inputs = [returns, returns_slice]
        expected_spearman.inputs = [returns, returns_slice]

        columns = {
            'pearson': pearson,
            'spearman': spearman,
            'expected_pearson': expected_pearson,
            'expected_spearman': expected_spearman,
        }

        results = run_pipeline(Pipeline(columns=columns), start_date, end_date)
        pearson_results = results['pearson'].unstack()
        spearman_results = results['spearman'].unstack()
        expected_pearson_results = results['expected_pearson'].unstack()
        expected_spearman_results = results['expected_spearman'].unstack()

        assert_frame_equal(pearson_results, expected_pearson_results)
        assert_frame_equal(spearman_results, expected_spearman_results)
Пример #11
0
    def test_load_empty(self):
        """
        For the case where raw data is empty, make sure we have a result for
        all sids, that the dimensions are correct, and that we have the
        correct missing value.
        """
        raw_events = pd.DataFrame(
            columns=["sid",
                     "timestamp",
                     "event_date",
                     "float",
                     "int",
                     "datetime",
                     "string"]
        )
        next_value_columns = {
            EventDataSet.next_datetime: 'datetime',
            EventDataSet.next_event_date: 'event_date',
            EventDataSet.next_float: 'float',
            EventDataSet.next_int: 'int',
            EventDataSet.next_string: 'string',
            EventDataSet.next_string_custom_missing: 'string'
        }
        previous_value_columns = {
            EventDataSet.previous_datetime: 'datetime',
            EventDataSet.previous_event_date: 'event_date',
            EventDataSet.previous_float: 'float',
            EventDataSet.previous_int: 'int',
            EventDataSet.previous_string: 'string',
            EventDataSet.previous_string_custom_missing: 'string'
        }
        loader = EventsLoader(
            raw_events, next_value_columns, previous_value_columns
        )
        engine = SimplePipelineEngine(
            lambda x: loader,
            self.trading_days,
            self.asset_finder,
        )

        results = engine.run_pipeline(
            Pipeline({c.name: c.latest for c in EventDataSet.columns}),
            start_date=self.trading_days[0],
            end_date=self.trading_days[-1],
        )

        assets = self.asset_finder.retrieve_all(self.ASSET_FINDER_EQUITY_SIDS)
        dates = self.trading_days

        expected = self.frame_containing_all_missing_values(
            index=pd.MultiIndex.from_product([dates, assets]),
            columns=EventDataSet.columns,
        )

        assert_equal(results, expected)
Пример #12
0
        def initialize(context):
            pipeline = Pipeline()
            context.vwaps = []
            for length in vwaps:
                name = vwap_key(length)
                factor = VWAP(window_length=length)
                context.vwaps.append(factor)
                pipeline.add(factor, name=name)

            filter_ = (USEquityPricing.close.latest > 300)
            pipeline.add(filter_, 'filter')
            if set_screen:
                pipeline.set_screen(filter_)

            attach_pipeline(pipeline, 'test')
Пример #13
0
 def test_simple_beta_matches_regression(self):
     run_pipeline = self.run_pipeline
     simple_beta = SimpleBeta(target=self.my_asset, regression_length=10)
     complex_beta = RollingLinearRegressionOfReturns(
         target=self.my_asset,
         returns_length=2,
         regression_length=10,
     ).beta
     pipe = Pipeline({'simple': simple_beta, 'complex': complex_beta})
     results = run_pipeline(
         pipe,
         self.pipeline_start_date,
         self.pipeline_end_date,
     )
     assert_equal(results['simple'], results['complex'], check_names=False)
Пример #14
0
    def test_masked_single_column_output(self):
        """
        Tests for masking custom factors that compute a 1D out.
        """
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day)
        alternating_mask.window_safe = True
        cascading_mask.window_safe = True

        for mask in (alternating_mask, cascading_mask):

            class SingleColumnOutput(CustomFactor):
                window_length = 1
                inputs = [self.col, mask]
                window_safe = True
                ndim = 1

                def compute(self, today, assets, out, col, mask):
                    # Because we specified ndim as 1, `out` should always be a
                    # singleton array but `close` should be a sized based on
                    # the mask we passed.
                    assert out.shape == (1, )
                    assert col.shape == (1, mask.sum())
                    out[:] = col.sum()

            # Since we cannot add single column output factors as pipeline
            # columns, we have to test its output through another factor.
            class UsesSingleColumnInput(CustomFactor):
                window_length = 1
                inputs = [self.col, mask, SingleColumnOutput(mask=mask)]

                def compute(self, today, assets, out, col, mask,
                            single_column_output):
                    # Make sure that `single_column` has the correct value
                    # based on the masked it used.
                    assert single_column_output.shape == (1, 1)
                    single_column_output_value = single_column_output[0][0]
                    expected_value = where(mask, col, 0).sum()
                    assert single_column_output_value == expected_value

            columns = {'uses_single_column_input': UsesSingleColumnInput()}

            # Assertions about the expected shapes of our data are made in the
            # `compute` function of our custom factors above.
            self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
Пример #15
0
    def test_single_column_output(self):
        """
        Tests for custom factors that compute a 1D out.
        """
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date

        alternating_mask = (AssetIDPlusDay() % 2).eq(0)
        cascading_mask = AssetIDPlusDay() < (self.sids[-1] + start_date.day)

        class SingleColumnOutput(CustomFactor):
            window_length = 1
            inputs = [self.col]
            window_safe = True
            ndim = 1

            def compute(self, today, assets, out, col):
                # Because we specified ndim as 1, `out` should be a singleton
                # array but `close` should be a regular sized input.
                assert out.shape == (1, )
                assert col.shape == (1, 3)
                out[:] = col.sum()

        # Since we cannot add single column output factors as pipeline
        # columns, we have to test its output through another factor.
        class UsesSingleColumnOutput(CustomFactor):
            window_length = 1
            inputs = [SingleColumnOutput()]

            def compute(self, today, assets, out, single_column_output):
                # Make sure that `single_column` has the correct shape. That
                # is, it should always have one column regardless of any mask
                # passed to `UsesSingleColumnInput`.
                assert single_column_output.shape == (1, 1)

        for mask in (alternating_mask, cascading_mask):
            columns = {
                'uses_single_column_output':
                UsesSingleColumnOutput(),
                'uses_single_column_output_masked':
                UsesSingleColumnOutput(mask=mask, ),
            }

            # Assertions about the expected shapes of our data are made in the
            # `compute` function of our custom factors above.
            self.run_pipeline(Pipeline(columns=columns), start_date, end_date)
Пример #16
0
    def test_latest(self):
        columns = TDS.columns
        pipe = Pipeline(
            columns={c.name: c.latest for c in columns},
        )

        cal_slice = slice(20, 40)
        dates_to_test = self.calendar[cal_slice]
        result = self.engine.run_pipeline(
            pipe,
            dates_to_test[0],
            dates_to_test[-1],
        )
        for column in columns:
            with ignore_pandas_nan_categorical_warning():
                col_result = result[column.name].unstack()

            expected_col_result = self.expected_latest(column, cal_slice)
            assert_frame_equal(col_result, expected_col_result)
Пример #17
0
    def _check_filters(self, evens, odds, first_five, last_three):
        pipe = Pipeline(
            columns={
                'sid': SidFactor(),
                'evens': evens,
                'odds': odds,
                'first_five': first_five,
                'last_three': last_three,
            },
        )

        start, end = self.trading_days[[-10, -1]]
        results = self.run_pipeline(pipe, start, end).unstack()

        sids = results.sid.astype(int64_dtype)

        assert_equal(results.evens, ~(sids % 2).astype(bool))
        assert_equal(results.odds, (sids % 2).astype(bool))
        assert_equal(results.first_five, sids < 5)
        assert_equal(results.last_three, sids >= 7)
Пример #18
0
    def test_downsampled_rank(self):
        downsampled_rank = self.factor.rank().downsample('month_start')
        pipeline = Pipeline({'rank': downsampled_rank})

        results_month_start = self.pipeline_engine.run_pipeline(
            pipeline,
            self.START_DATE,
            self.END_DATE,
        )

        half_way_start = self.HALF_WAY_POINT + pd.Timedelta(days=1)
        results_halfway_start = self.pipeline_engine.run_pipeline(
            pipeline,
            half_way_start,
            self.END_DATE,
        )

        results_month_start_aligned = results_month_start.loc[half_way_start:]

        assert_frame_equal(results_month_start_aligned, results_halfway_start)
Пример #19
0
    def test_non_existent_asset(self):
        """
        Test that indexing into a term with a non-existent asset raises the
        proper exception.
        """
        my_asset = Asset(0, exchange="TEST")
        returns = Returns(window_length=2, inputs=[self.col])
        returns_slice = returns[my_asset]

        class UsesSlicedInput(CustomFactor):
            window_length = 1
            inputs = [returns_slice]

            def compute(self, today, assets, out, returns_slice):
                pass

        with self.assertRaises(NonExistentAssetInTimeFrame):
            self.run_pipeline(
                Pipeline(columns={'uses_sliced_input': UsesSlicedInput()}),
                self.pipeline_start_date,
                self.pipeline_end_date,
            )
Пример #20
0
    def test_factor_regression_method(self, returns_length, regression_length):
        """
        Ensure that `Factor.linear_regression` is consistent with the built-in
        factor `RollingLinearRegressionOfReturns`.
        """
        my_asset = self.my_asset
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        regression = returns.linear_regression(
            target=returns_slice, regression_length=regression_length,
        )
        expected_regression = RollingLinearRegressionOfReturns(
            target=my_asset,
            returns_length=returns_length,
            regression_length=regression_length,
        )

        # This built-in constructs its own Returns factor to use as an input,
        # so the only way to set our own input is to do so after the fact. This
        # should not be done in practice. It is necessary here because we want
        # Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_regression.inputs = [returns, returns_slice]

        columns = {
            'regression': regression,
            'expected_regression': expected_regression,
        }

        results = run_pipeline(Pipeline(columns=columns), start_date, end_date)
        regression_results = results['regression'].unstack()
        expected_regression_results = results['expected_regression'].unstack()

        assert_frame_equal(regression_results, expected_regression_results)
Пример #21
0
    def test_correlation_factors(self, returns_length, correlation_length):
        """
        Tests for the built-in factors `RollingPearsonOfReturns` and
        `RollingSpearmanOfReturns`.
        """
        assets = self.assets
        my_asset = self.my_asset
        my_asset_column = self.my_asset_column
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length)
        masks = (self.cascading_mask, self.alternating_mask, NotSpecified)
        expected_mask_results = (
            self.expected_cascading_mask_result,
            self.expected_alternating_mask_result,
            self.expected_no_mask_result,
        )

        for mask, expected_mask in zip(masks, expected_mask_results):
            pearson_factor = RollingPearsonOfReturns(
                target=my_asset,
                returns_length=returns_length,
                correlation_length=correlation_length,
                mask=mask,
            )
            spearman_factor = RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=returns_length,
                correlation_length=correlation_length,
                mask=mask,
            )

            columns = {
                'pearson_factor': pearson_factor,
                'spearman_factor': spearman_factor,
            }
            pipeline = Pipeline(columns=columns)
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = run_pipeline(pipeline, start_date, end_date)
            pearson_results = results['pearson_factor'].unstack()
            spearman_results = results['spearman_factor'].unstack()
            if mask is not NotSpecified:
                mask_results = results['mask'].unstack()
                check_arrays(mask_results.values, expected_mask)

            # Run a separate pipeline that calculates returns starting
            # (correlation_length - 1) days prior to our start date. This is
            # because we need (correlation_length - 1) extra days of returns to
            # compute our expected correlations.
            results = run_pipeline(
                Pipeline(columns={'returns': returns}),
                dates[start_date_index - (correlation_length - 1)],
                dates[end_date_index],
            )
            returns_results = results['returns'].unstack()

            # On each day, calculate the expected correlation coefficients
            # between the asset we are interested in and each other asset. Each
            # correlation is calculated over `correlation_length` days.
            expected_pearson_results = full_like(pearson_results, nan)
            expected_spearman_results = full_like(spearman_results, nan)
            for day in range(num_days):
                todays_returns = returns_results.iloc[
                    day:day + correlation_length
                ]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_pearson_results[day, asset_column] = pearsonr(
                        my_asset_returns, other_asset_returns,
                    )[0]
                    expected_spearman_results[day, asset_column] = spearmanr(
                        my_asset_returns, other_asset_returns,
                    )[0]

            expected_pearson_results = DataFrame(
                data=where(expected_mask, expected_pearson_results, nan),
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(pearson_results, expected_pearson_results)

            expected_spearman_results = DataFrame(
                data=where(expected_mask, expected_spearman_results, nan),
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(spearman_results, expected_spearman_results)
Пример #22
0
    def test_factor_regression_method_two_factors(self, regression_length):
        """
        Tests for `Factor.linear_regression` when passed another 2D factor
        instead of a Slice.
        """
        assets = self.assets
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        # The order of these is meant to align with the output of `linregress`.
        outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr']

        # Ensure that the `linear_regression` method cannot be called with two
        # 2D factors which have different masks.
        returns_masked_1 = Returns(
            window_length=5, inputs=[self.col], mask=AssetID().eq(1),
        )
        returns_masked_2 = Returns(
            window_length=5, inputs=[self.col], mask=AssetID().eq(2),
        )
        with self.assertRaises(IncompatibleTerms):
            returns_masked_1.linear_regression(
                target=returns_masked_2, regression_length=regression_length,
            )

        returns_5 = Returns(window_length=5, inputs=[self.col])
        returns_10 = Returns(window_length=10, inputs=[self.col])

        regression_factor = returns_5.linear_regression(
            target=returns_10, regression_length=regression_length,
        )

        columns = {
            output: getattr(regression_factor, output)
            for output in outputs
        }
        pipeline = Pipeline(columns=columns)

        results = run_pipeline(pipeline, start_date, end_date)

        output_results = {}
        expected_output_results = {}
        for output in outputs:
            output_results[output] = results[output].unstack()
            expected_output_results[output] = full_like(
                output_results[output], nan,
            )

        # Run a separate pipeline that calculates returns starting
        # (regression_length - 1) days prior to our start date. This is because
        # we need (regression_length - 1) extra days of returns to compute our
        # expected regressions.
        columns = {'returns_5': returns_5, 'returns_10': returns_10}
        results = run_pipeline(
            Pipeline(columns=columns),
            dates[start_date_index - (regression_length - 1)],
            dates[end_date_index],
        )
        returns_5_results = results['returns_5'].unstack()
        returns_10_results = results['returns_10'].unstack()

        # On each day, for each asset, calculate the expected regression
        # results of Y ~ X where Y is the asset's rolling 5 day returns and X
        # is the asset's rolling 10 day returns. Each regression is calculated
        # over `regression_length` days of data.
        for day in range(num_days):
            todays_returns_5 = returns_5_results.iloc[
                day:day + regression_length
            ]
            todays_returns_10 = returns_10_results.iloc[
                day:day + regression_length
            ]
            for asset, asset_returns_5 in todays_returns_5.iteritems():
                asset_column = int(asset) - 1
                asset_returns_10 = todays_returns_10[asset]
                expected_regression_results = linregress(
                    y=asset_returns_5, x=asset_returns_10,
                )
                for i, output in enumerate(outputs):
                    expected_output_results[output][day, asset_column] = \
                        expected_regression_results[i]

        for output in outputs:
            output_result = output_results[output]
            expected_output_result = DataFrame(
                expected_output_results[output],
                index=dates[start_date_index:end_date_index + 1],
                columns=assets,
            )
            assert_frame_equal(output_result, expected_output_result)
Пример #23
0
    def test_factor_correlation_methods_two_factors(self, correlation_length):
        """
        Tests for `Factor.pearsonr` and `Factor.spearmanr` when passed another
        2D factor instead of a Slice.
        """
        assets = self.assets
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        # Ensure that the correlation methods cannot be called with two 2D
        # factors which have different masks.
        returns_masked_1 = Returns(
            window_length=5, inputs=[self.col], mask=AssetID().eq(1),
        )
        returns_masked_2 = Returns(
            window_length=5, inputs=[self.col], mask=AssetID().eq(2),
        )
        with self.assertRaises(IncompatibleTerms):
            returns_masked_1.pearsonr(
                target=returns_masked_2, correlation_length=correlation_length,
            )
        with self.assertRaises(IncompatibleTerms):
            returns_masked_1.spearmanr(
                target=returns_masked_2, correlation_length=correlation_length,
            )

        returns_5 = Returns(window_length=5, inputs=[self.col])
        returns_10 = Returns(window_length=10, inputs=[self.col])

        pearson_factor = returns_5.pearsonr(
            target=returns_10, correlation_length=correlation_length,
        )
        spearman_factor = returns_5.spearmanr(
            target=returns_10, correlation_length=correlation_length,
        )

        columns = {
            'pearson_factor': pearson_factor,
            'spearman_factor': spearman_factor,
        }
        pipeline = Pipeline(columns=columns)

        results = run_pipeline(pipeline, start_date, end_date)
        pearson_results = results['pearson_factor'].unstack()
        spearman_results = results['spearman_factor'].unstack()

        # Run a separate pipeline that calculates returns starting
        # (correlation_length - 1) days prior to our start date. This is
        # because we need (correlation_length - 1) extra days of returns to
        # compute our expected correlations.
        columns = {'returns_5': returns_5, 'returns_10': returns_10}
        results = run_pipeline(
            Pipeline(columns=columns),
            dates[start_date_index - (correlation_length - 1)],
            dates[end_date_index],
        )
        returns_5_results = results['returns_5'].unstack()
        returns_10_results = results['returns_10'].unstack()

        # On each day, calculate the expected correlation coefficients
        # between each asset's 5 and 10 day rolling returns. Each correlation
        # is calculated over `correlation_length` days.
        expected_pearson_results = full_like(pearson_results, nan)
        expected_spearman_results = full_like(spearman_results, nan)
        for day in range(num_days):
            todays_returns_5 = returns_5_results.iloc[
                day:day + correlation_length
            ]
            todays_returns_10 = returns_10_results.iloc[
                day:day + correlation_length
            ]
            for asset, asset_returns_5 in todays_returns_5.iteritems():
                asset_column = int(asset) - 1
                asset_returns_10 = todays_returns_10[asset]
                expected_pearson_results[day, asset_column] = pearsonr(
                    asset_returns_5, asset_returns_10,
                )[0]
                expected_spearman_results[day, asset_column] = spearmanr(
                    asset_returns_5, asset_returns_10,
                )[0]

        expected_pearson_results = DataFrame(
            data=expected_pearson_results,
            index=dates[start_date_index:end_date_index + 1],
            columns=assets,
        )
        assert_frame_equal(pearson_results, expected_pearson_results)

        expected_spearman_results = DataFrame(
            data=expected_spearman_results,
            index=dates[start_date_index:end_date_index + 1],
            columns=assets,
        )
        assert_frame_equal(spearman_results, expected_spearman_results)
Пример #24
0
 def initialize(context):
     p = attach_pipeline(Pipeline(), 'test', chunks=chunks)
     p.add(USEquityPricing.close.latest, 'close')
Пример #25
0
        def initialize(context):
            pipeline_close = attach_pipeline(Pipeline(), 'test_close')
            pipeline_volume = attach_pipeline(Pipeline(), 'test_volume')

            pipeline_close.add(USEquityPricing.close.latest, 'close')
            pipeline_volume.add(USEquityPricing.volume.latest, 'volume')
Пример #26
0
    def test_regression_of_returns_factor(self,
                                          returns_length,
                                          regression_length):
        """
        Tests for the built-in factor `RollingLinearRegressionOfReturns`.
        """
        assets = self.assets
        my_asset = self.my_asset
        my_asset_column = self.my_asset_column
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        # The order of these is meant to align with the output of `linregress`.
        outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr']

        returns = Returns(window_length=returns_length)
        masks = self.cascading_mask, self.alternating_mask, NotSpecified
        expected_mask_results = (
            self.expected_cascading_mask_result,
            self.expected_alternating_mask_result,
            self.expected_no_mask_result,
        )

        for mask, expected_mask in zip(masks, expected_mask_results):
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=returns_length,
                regression_length=regression_length,
                mask=mask,
            )

            columns = {
                output: getattr(regression_factor, output)
                for output in outputs
            }
            pipeline = Pipeline(columns=columns)
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = run_pipeline(pipeline, start_date, end_date)
            if mask is not NotSpecified:
                mask_results = results['mask'].unstack()
                check_arrays(mask_results.values, expected_mask)

            output_results = {}
            expected_output_results = {}
            for output in outputs:
                output_results[output] = results[output].unstack()
                expected_output_results[output] = full_like(
                    output_results[output], nan,
                )

            # Run a separate pipeline that calculates returns starting
            # (regression_length - 1) days prior to our start date. This is
            # because we need (regression_length - 1) extra days of returns to
            # compute our expected regressions.
            results = run_pipeline(
                Pipeline(columns={'returns': returns}),
                dates[start_date_index - (regression_length - 1)],
                dates[end_date_index],
            )
            returns_results = results['returns'].unstack()

            # On each day, calculate the expected regression results for Y ~ X
            # where Y is the asset we are interested in and X is each other
            # asset. Each regression is calculated over `regression_length`
            # days of data.
            for day in range(num_days):
                todays_returns = returns_results.iloc[
                    day:day + regression_length
                ]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_regression_results = linregress(
                        y=other_asset_returns, x=my_asset_returns,
                    )
                    for i, output in enumerate(outputs):
                        expected_output_results[output][day, asset_column] = \
                            expected_regression_results[i]

            for output in outputs:
                output_result = output_results[output]
                expected_output_result = DataFrame(
                    where(expected_mask, expected_output_results[output], nan),
                    index=dates[start_date_index:end_date_index + 1],
                    columns=assets,
                )
                assert_frame_equal(output_result, expected_output_result)
Пример #27
0
 def initialize(context):
     attach_pipeline(Pipeline(), 'test')
     attach_pipeline(Pipeline(), 'test')
Пример #28
0
 def initialize(context):
     attach_pipeline(Pipeline(), 'test')
     pipeline_output('test')
     raise AssertionError("Shouldn't make it past pipeline_output()")
Пример #29
0
def make_pipeline():
    rsi = RSI()
    return Pipeline(columns={
        'longs': rsi.top(3),
        'shorts': rsi.bottom(3),
    }, )
Пример #30
0
 def late_attach(context, data):
     attach_pipeline(Pipeline(), 'test')
     raise AssertionError("Shouldn't make it past attach_pipeline!")