示例#1
0
    def test_require_length_greater_than_one(self):
        my_asset = Equity(
            0,
            exchange_info=ExchangeInfo('TEST', 'TEST FULL', 'US'),
        )

        with self.assertRaises(ValueError):
            RollingPearsonOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=1,
            )

        with self.assertRaises(ValueError):
            RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=1,
            )

        with self.assertRaises(ValueError):
            RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=3,
                regression_length=1,
            )
示例#2
0
    def pipeline_columns_and_mask(self):
        factors, universe = self.__make_factors()
        from collections import OrderedDict
        factors_pipe = OrderedDict()
        # Create returns over last n days.
        factors_pipe['Returns'] = Returns(inputs=[USEquityPricing.close],
                                          mask=universe,
                                          window_length=5)
        # Instantiate ranked factors
        for name, f in factors.items():
            f.window_safe = True
            factors_pipe[name] = f.rank(
                mask=universe)  #rank 使用相对顺序,而不是绝对值,避免自相似性

        predict = BasicFactorRegress(inputs=factors_pipe.values(),
                                     window_length=42,
                                     mask=universe)  #进行预测,5天后价格
        risk_beta = 0.66 * RollingLinearRegressionOfReturns(
            target=symbol(risk_benchmark),  # sid(8554),
            returns_length=6,
            regression_length=21,
            # mask=long_short_screen
            mask=(universe),
        ).beta + 0.33 * 1.0
        sector = get_sector()

        columns = {
            'market_beta': risk_beta,
            'sector': sector,
            'predict': predict,
        }
        return columns, universe
    def test_correlation_and_regression_with_bad_asset(self):
        """
        Test that `RollingPearsonOfReturns`, `RollingSpearmanOfReturns` and
        `RollingLinearRegressionOfReturns` raise the proper exception when
        given a nonexistent target asset.
        """
        my_asset = Equity(
            0,
            real_sid='0',
            currency='USD',
            exchange_info=ExchangeInfo('TEST', 'TEST FULL', 'US'),
        )
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        # This filter is arbitrary; the important thing is that we test each
        # factor both with and without a specified mask.
        my_asset_filter = AssetID().eq(1)

        for mask in (NotSpecified, my_asset_filter):
            pearson_factor = RollingPearsonOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=3,
                mask=mask,
            )
            spearman_factor = RollingSpearmanOfReturns(
                target=my_asset,
                returns_length=3,
                correlation_length=3,
                mask=mask,
            )
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=3,
                regression_length=3,
                mask=mask,
            )

            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'pearson_factor': pearson_factor}),
                    start_date,
                    end_date,
                )
            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'spearman_factor': spearman_factor}),
                    start_date,
                    end_date,
                )
            with self.assertRaises(NonExistentAssetInTimeFrame):
                run_pipeline(
                    Pipeline(columns={'regression_factor': regression_factor}),
                    start_date,
                    end_date,
                )
示例#4
0
    def test_factor_regression_method(self, returns_length, regression_length):
        """
        Ensure that `Factor.linear_regression` is consistent with the built-in
        factor `RollingLinearRegressionOfReturns`.
        """
        my_asset = self.my_asset
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        regression = returns.linear_regression(
            target=returns_slice,
            regression_length=regression_length,
        )
        expected_regression = RollingLinearRegressionOfReturns(
            target=my_asset,
            returns_length=returns_length,
            regression_length=regression_length,
        )

        # This built-in constructs its own Returns factor to use as an input,
        # so the only way to set our own input is to do so after the fact. This
        # should not be done in practice. It is necessary here because we want
        # Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_regression.inputs = [returns, returns_slice]

        columns = {
            'regression': regression,
            'expected_regression': expected_regression,
        }

        results = run_pipeline(Pipeline(columns=columns), start_date, end_date)
        regression_results = results['regression'].unstack()
        expected_regression_results = results['expected_regression'].unstack()

        assert_frame_equal(regression_results, expected_regression_results)
    def test_factor_regression_method(self, returns_length, regression_length):
        """
        Ensure that `Factor.linear_regression` is consistent with the built-in
        factor `RollingLinearRegressionOfReturns`.
        """
        my_asset = self.my_asset
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        run_pipeline = self.run_pipeline

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        regression = returns.linear_regression(
            target=returns_slice, regression_length=regression_length,
        )
        expected_regression = RollingLinearRegressionOfReturns(
            target=my_asset,
            returns_length=returns_length,
            regression_length=regression_length,
        )

        # This built-in constructs its own Returns factor to use as an input,
        # so the only way to set our own input is to do so after the fact. This
        # should not be done in practice. It is necessary here because we want
        # Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_regression.inputs = [returns, returns_slice]

        columns = {
            'regression': regression,
            'expected_regression': expected_regression,
        }

        results = run_pipeline(Pipeline(columns=columns), start_date, end_date)
        regression_results = results['regression'].unstack()
        expected_regression_results = results['expected_regression'].unstack()

        assert_frame_equal(regression_results, expected_regression_results)
示例#6
0
 def test_simple_beta_matches_regression(self):
     run_pipeline = self.run_pipeline
     simple_beta = SimpleBeta(target=self.my_asset, regression_length=10)
     complex_beta = RollingLinearRegressionOfReturns(
         target=self.my_asset,
         returns_length=2,
         regression_length=10,
     ).beta
     pipe = Pipeline({'simple': simple_beta, 'complex': complex_beta})
     results = run_pipeline(
         pipe,
         self.pipeline_start_date,
         self.pipeline_end_date,
     )
     assert_equal(results['simple'], results['complex'], check_names=False)
    def pipeline_columns_and_mask(self):
        universe = make_china_equity_universe(
            target_size=2000,
            mask=default_china_equity_universe_mask([risk_benchmark]),
            max_group_weight=0.01,
            smoothing_func=lambda f: f.downsample('month_start'),
        )
        private_universe = private_universe_mask(
            self.portfolio.index)  #把当前组合的stock 包含在universe中

        last_price = USEquityPricing.close.latest >= 1.0  #大于1元
        universe = universe & last_price | private_universe
        hurst = HurstExp(window_length=int(252 * 0.25),
                         mask=universe)  #判断动量或反转特性指标
        sector = get_sector()
        #combined_rank = (
        #    hurst.rank(mask=universe)
        #)
        pct_slope = Slope(window_length=21, mask=(universe))  #量和价格加速度
        risk_beta = 0.66 * RollingLinearRegressionOfReturns(
            target=symbol(risk_benchmark),
            returns_length=5,
            regression_length=21,
            mask=(universe),
        ).beta + 0.33 * 1.0
        returns = Returns(inputs=[USEquityPricing.close],
                          mask=universe,
                          window_length=2)
        #returns.window_safe = True
        #risk_beta.window_safe = True
        #m = Markowitz(inputs=[returns,risk_beta],window_length=6,mask=universe)
        columns = {
            'hurst': hurst.downsample('week_start'),
            'price_pct_slope': pct_slope.pslope,
            'volume_pct_slope': pct_slope.vslope,
            'sector': sector.downsample('month_start'),
            'market_beta': risk_beta,
            'returns': returns,
        }
        return columns, universe
示例#8
0
    def pipeline_columns_and_mask(self):
        universe = make_china_equity_universe(
            target_size=3000,
            mask=default_china_equity_universe_mask([risk_benchmark]),
            max_group_weight=0.01,
            smoothing_func=lambda f: f.downsample('month_start'),
        )
        private_universe = private_universe_mask(
            self.portfolio.index)  # 把当前组合的stock 包含在universe中
        last_price = USEquityPricing.close.latest >= 1.0  # 大于1元
        universe = (universe & last_price) & ~private_universe
        # print "universe:",universe
        # Instantiate ranked factors
        returns = Returns(inputs=[USEquityPricing.close],
                          mask=universe,
                          window_length=2)
        risk_beta = 0.66 * RollingLinearRegressionOfReturns(
            target=symbol(risk_benchmark),  # sid(8554),
            returns_length=5,
            regression_length=21,
            # mask=long_short_screen
            mask=(universe),
        ).beta + 0.33 * 1.0
        returns.window_safe = True
        risk_beta.window_safe = True

        predict = RNNPredict(universe, trigger_date=self.predict_time)  # 进行回顾
        universe = predict.top(20)
        weights = Markowitz(inputs=[returns, risk_beta],
                            window_length=4,
                            mask=universe,
                            trigger_date=self.predict_time)  #进行回顾

        columns = {
            'predict': predict,
            'weights': weights,
        }

        return columns, universe
    def pipeline_columns_and_mask(self):
        '''
        universe = make_china_equity_universe(
            target_size=3000,
            mask=default_china_equity_universe_mask([RISK_BENCHMARK]),
            max_group_weight=0.01,
            smoothing_func=lambda f: f.downsample('month_start'),

        )
        private_universe = private_universe_mask(self.portfolio.index)  # 把当前组合的stock 包含在universe中
        '''
        last_price = USEquityPricing.close.latest >= 1.0  # 大于1元
        sector = get_sector()
        sector_filter = sector != 0.0
        universe = last_price & sector_filter
        # print "universe:",universe
        # Instantiate ranked factors
        returns = Returns(inputs=[USEquityPricing.close],
                          mask=universe,
                          window_length=2)
        risk_beta = 0.66 * RollingLinearRegressionOfReturns(
            target=symbol(RISK_BENCHMARK),
            returns_length=5,
            regression_length=21,
            # mask=long_short_screen
            mask=(universe),
        ).beta + 0.33 * 1.0
        returns.window_safe = True
        risk_beta.window_safe = True
        predict = RNNPredict(universe,
                             source='predict.csv',
                             trigger_date=self.predict_time)  # 进行回顾
        columns = {
            'predict': predict,
            'market_beta': risk_beta,
            'sector': sector,
        }
        return columns, universe
示例#10
0
    def test_factor_regression_method(self, returns_length, regression_length):
        """
        Ensure that `Factor.linear_regression` is consistent with the built-in
        factor `RollingLinearRegressionOfReturns`.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        regression = returns.linear_regression(
            target=returns_slice,
            regression_length=regression_length,
        )
        expected_regression = RollingLinearRegressionOfReturns(
            target=my_asset,
            returns_length=returns_length,
            regression_length=regression_length,
        )

        # These built-ins construct their own Returns factor to use as inputs,
        # so the only way to set our own inputs is to do so after the fact.
        # This should not be done in practice. It is necessary here because we
        # want Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_regression.inputs = [returns, returns_slice]

        class MyFactor(CustomFactor):
            inputs = ()
            window_length = 1

            def compute(self, today, assets, out):
                out[:] = 0

        columns = {
            'regression': regression,
            'expected_regression': expected_regression,
        }

        results = self.run_pipeline(
            Pipeline(columns=columns),
            self.pipeline_start_date,
            self.pipeline_end_date,
        )
        regression_results = results['regression'].unstack()
        expected_regression_results = results['expected_regression'].unstack()

        assert_frame_equal(regression_results, expected_regression_results)

        # Make sure we cannot call the linear regression method on factors or
        # slices of dtype `datetime64[ns]`.
        class DateFactor(CustomFactor):
            window_length = 1
            inputs = []
            dtype = datetime64ns_dtype
            window_safe = True

            def compute(self, today, assets, out):
                pass

        date_factor = DateFactor()
        date_factor_slice = date_factor[my_asset]

        with self.assertRaises(TypeError):
            date_factor.linear_regression(
                target=returns_slice,
                regression_length=regression_length,
            )
        with self.assertRaises(TypeError):
            returns.linear_regression(
                target=date_factor_slice,
                regression_length=regression_length,
            )
示例#11
0
    def test_regression_of_returns_factor(self, returns_length,
                                          regression_length):
        """
        Tests for the built-in factor `RollingLinearRegressionOfReturns`.
        """
        assets = self.assets
        my_asset = self.my_asset
        my_asset_column = self.my_asset_column
        dates = self.dates
        start_date = self.pipeline_start_date
        end_date = self.pipeline_end_date
        start_date_index = self.start_date_index
        end_date_index = self.end_date_index
        num_days = self.num_days
        run_pipeline = self.run_pipeline

        # The order of these is meant to align with the output of `linregress`.
        outputs = ['beta', 'alpha', 'r_value', 'p_value', 'stderr']

        returns = Returns(window_length=returns_length)
        masks = self.cascading_mask, self.alternating_mask, NotSpecified
        expected_mask_results = (
            self.expected_cascading_mask_result,
            self.expected_alternating_mask_result,
            self.expected_no_mask_result,
        )

        for mask, expected_mask in zip(masks, expected_mask_results):
            regression_factor = RollingLinearRegressionOfReturns(
                target=my_asset,
                returns_length=returns_length,
                regression_length=regression_length,
                mask=mask,
            )

            columns = {
                output: getattr(regression_factor, output)
                for output in outputs
            }
            pipeline = Pipeline(columns=columns)
            if mask is not NotSpecified:
                pipeline.add(mask, 'mask')

            results = run_pipeline(pipeline, start_date, end_date)
            if mask is not NotSpecified:
                mask_results = results['mask'].unstack()
                check_arrays(mask_results.values, expected_mask)

            output_results = {}
            expected_output_results = {}
            for output in outputs:
                output_results[output] = results[output].unstack()
                expected_output_results[output] = full_like(
                    output_results[output],
                    nan,
                )

            # Run a separate pipeline that calculates returns starting
            # (regression_length - 1) days prior to our start date. This is
            # because we need (regression_length - 1) extra days of returns to
            # compute our expected regressions.
            results = run_pipeline(
                Pipeline(columns={'returns': returns}),
                dates[start_date_index - (regression_length - 1)],
                dates[end_date_index],
            )
            returns_results = results['returns'].unstack()

            # On each day, calculate the expected regression results for Y ~ X
            # where Y is the asset we are interested in and X is each other
            # asset. Each regression is calculated over `regression_length`
            # days of data.
            for day in range(num_days):
                todays_returns = returns_results.iloc[day:day +
                                                      regression_length]
                my_asset_returns = todays_returns.iloc[:, my_asset_column]
                for asset, other_asset_returns in todays_returns.iteritems():
                    asset_column = int(asset) - 1
                    expected_regression_results = linregress(
                        y=other_asset_returns,
                        x=my_asset_returns,
                    )
                    for i, output in enumerate(outputs):
                        expected_output_results[output][day, asset_column] = \
                            expected_regression_results[i]

            for output in outputs:
                output_result = output_results[output]
                expected_output_result = DataFrame(
                    where(expected_mask, expected_output_results[output], nan),
                    index=dates[start_date_index:end_date_index + 1],
                    columns=assets,
                )
                assert_frame_equal(output_result, expected_output_result)
示例#12
0
    def test_factor_regression_method(self, returns_length, regression_length):
        """
        Ensure that `Factor.linear_regression` is consistent with the built-in
        factor `RollingLinearRegressionOfReturns`.
        """
        my_asset = self.asset_finder.retrieve_asset(self.sids[0])

        returns = Returns(window_length=returns_length, inputs=[self.col])
        returns_slice = returns[my_asset]

        regression = returns.linear_regression(
            target=returns_slice, regression_length=regression_length,
        )
        expected_regression = RollingLinearRegressionOfReturns(
            target=my_asset,
            returns_length=returns_length,
            regression_length=regression_length,
        )

        # These built-ins construct their own Returns factor to use as inputs,
        # so the only way to set our own inputs is to do so after the fact.
        # This should not be done in practice. It is necessary here because we
        # want Returns to use our random data as an input, but by default it is
        # using USEquityPricing.close.
        expected_regression.inputs = [returns, returns_slice]

        columns = {
            'regression': regression,
            'expected_regression': expected_regression,
        }

        results = self.run_pipeline(
            Pipeline(columns=columns),
            self.pipeline_start_date,
            self.pipeline_end_date,
        )
        regression_results = results['regression'].unstack()
        expected_regression_results = results['expected_regression'].unstack()

        assert_frame_equal(regression_results, expected_regression_results)

        # Make sure we cannot call the linear regression method on factors or
        # slices of dtype `datetime64[ns]`.
        class DateFactor(CustomFactor):
            window_length = 1
            inputs = []
            dtype = datetime64ns_dtype
            window_safe = True

            def compute(self, today, assets, out):
                pass

        date_factor = DateFactor()
        date_factor_slice = date_factor[my_asset]

        with self.assertRaises(TypeError):
            date_factor.linear_regression(
                target=returns_slice, regression_length=regression_length,
            )
        with self.assertRaises(TypeError):
            returns.linear_regression(
                target=date_factor_slice, regression_length=regression_length,
            )