Exemplo n.º 1
0
    def _prepare_data_to_plot(self, tail=False):
        strategy_rets = self.strategy_tms.to_simple_returns()
        benchmark_rets = self.benchmark_tms.to_simple_returns()

        strategy_rets, benchmark_rets = get_values_for_common_dates(strategy_rets, benchmark_rets)
        datapoints_tms = pd.concat((benchmark_rets, strategy_rets), axis=1)

        if tail:
            def get_tail_indices():
                avg_rets = strategy_rets.mean()
                std_rets = strategy_rets.std()
                # Tail events are < the avg portfolio returns minus one std
                return strategy_rets < avg_rets - std_rets

            tail_indices = get_tail_indices()
            strategy_tail_returns = strategy_rets.loc[tail_indices]

            beta, alpha, r_value, p_value, std_err = beta_and_alpha_full_stats(
                strategy_tms=strategy_tail_returns, benchmark_tms=benchmark_rets)
        else:
            beta, alpha, r_value, p_value, std_err = beta_and_alpha_full_stats(
                strategy_tms=strategy_rets, benchmark_tms=benchmark_rets)

        max_ret = datapoints_tms.abs().max().max()  # take max element from the whole data-frame
        x = np.linspace(-max_ret, max_ret, 20)
        y = beta * x + alpha
        regression_line = QFSeries(data=y, index=pd.Float64Index(x))

        return datapoints_tms, regression_line, beta, alpha, r_value ** 2, max_ret
Exemplo n.º 2
0
    def _use_regression_to_fill_missing_data(self, benchmark_tms, columns_type, result_dataframe, empty_values_idx):
        num_of_columns = result_dataframe.shape[1]
        for i in range(num_of_columns):
            column = result_dataframe.iloc[:, i]
            nans_in_column_idx = empty_values_idx.iloc[:, i]
            beta, alpha = self._get_beta_and_alpha(
                benchmark_tms, column, columns_type, nans_in_column_idx)

            benchmark_common_tms, nans_common_idx = get_values_for_common_dates(
                benchmark_tms, nans_in_column_idx)
            benchmark_values_for_missing_dates = benchmark_common_tms[nans_common_idx]
            missing_values = beta * benchmark_values_for_missing_dates + alpha
            column[nans_in_column_idx] = missing_values

        return result_dataframe
Exemplo n.º 3
0
    def _preprocess_data(self, analysed_tms, regressors_df):
        """
        Cleans the data before they are processed (e.g. removes regressors containing too many missing data,
        proxies missing data).
        """

        self.logger.debug("Length of input timeseries: {:d} \n".format(len(analysed_tms)))

        data_cleaner = DataCleaner(regressors_df)
        common_regressors_df = data_cleaner.proxy_using_regression(analysed_tms, columns_type=SimpleReturnsSeries)
        common_regressors_df, common_analysed_tms = get_values_for_common_dates(common_regressors_df, analysed_tms)

        self.logger.debug("Length of preprocessed timeseries: {:d}".format(common_analysed_tms.size))
        self.logger.debug("Number of regressors: {:d}".format(common_regressors_df.shape[1]))

        return common_regressors_df, common_analysed_tms
Exemplo n.º 4
0
    def _prepare_data_to_plot(self):
        strategy_rets = self.strategy_tms.to_simple_returns()
        benchmark_rets = self.benchmark_tms.to_simple_returns()

        strategy_rets, benchmark_rets = get_values_for_common_dates(
            strategy_rets, benchmark_rets)
        datapoints_tms = pd.concat((benchmark_rets, strategy_rets), axis=1)

        beta, alpha, r_value, p_value, std_err = beta_and_alpha_full_stats(
            strategy_tms=strategy_rets, benchmark_tms=benchmark_rets)
        max_ret = datapoints_tms.abs().max().max(
        )  # take max element from the whole data-frame
        x = np.linspace(-max_ret, max_ret, 20)
        y = beta * x + alpha
        regression_line = QFSeries(data=y, index=pd.Float64Index(x))

        return datapoints_tms, regression_line, beta, alpha, r_value**2, max_ret
Exemplo n.º 5
0
def beta_and_alpha_full_stats(
        strategy_tms: QFSeries,
        benchmark_tms: QFSeries) -> Tuple[float, float, float, float, float]:
    """
    Calculates alpha and beta of the series versus the benchmark series.

    Parameters
    ----------
    strategy_tms
        Series of portfolio's returns/values
    benchmark_tms
        Series of benchmark returns/values

    Returns
    -------
    beta
        beta coefficient for the linear fit
    alpha
        alpha coefficient for the linear fit
        (y = alpha * x + beta, where x is the benchmark return and y is the portfolio's return)
    r_value
        correlation coefficient. NOTE: this is not r_squared, r_squared = r_value**2
    p_value
        two-sided p-value for a hypothesis test whose null hypothesis is that the slope is zero
    std_err
        standard error of the estimate
    """
    strategy_tms = strategy_tms.to_simple_returns()
    benchmark_tms = benchmark_tms.to_simple_returns()

    from qf_lib.common.utils.dateutils.get_values_common_dates import get_values_for_common_dates
    strategy_tms, benchmark_tms = get_values_for_common_dates(strategy_tms,
                                                              benchmark_tms,
                                                              remove_nans=True)

    strategy_returns = strategy_tms.values
    benchmark_returns = benchmark_tms.values

    beta, alpha, r_value, p_value, std_err = stats.linregress(
        benchmark_returns, strategy_returns)

    return beta, alpha, r_value, p_value, std_err
Exemplo n.º 6
0
    def test_get_values_for_common_dates(self):
        data = range(6)
        dates1 = DatetimeIndex([
            '2014-12-31', '2015-01-02', '2015-01-04', '2015-01-05',
            '2015-01-09', '2015-01-10'
        ])
        dates2 = DatetimeIndex([
            '2015-02-01', '2015-01-02', '2015-01-03', '2015-01-04',
            '2015-01-05', '2015-01-10'
        ])
        series1 = QFSeries(data=data, index=dates1, name='Series 1')
        series2 = QFSeries(data=data, index=dates2, name='Series 2')
        data_2d = array([data, data]).transpose()
        dataframe1 = QFDataFrame(
            data=data_2d,
            index=dates2,
            columns=['DataFrame Col. A', 'DataFrame Col. B'])

        expected_index = DatetimeIndex(
            ['2015-01-02', '2015-01-04', '2015-01-05', '2015-01-10'])
        expected_data1 = [1, 2, 3, 5]
        expected_series1 = QFSeries(data=expected_data1,
                                    index=expected_index,
                                    name='Series 1')
        expected_data2 = [1, 3, 4, 5]
        expected_series2 = QFSeries(data=expected_data2,
                                    index=expected_index,
                                    name='Series 2')
        expected_dataframe = QFDataFrame(
            data=array([expected_data2, expected_data2]).transpose(),
            index=expected_index,
            columns=['DataFrame Col. A', 'DataFrame Col. B'])

        actual_series1, actual_series2, actual_dataframe = get_values_for_common_dates(
            series1, series2, dataframe1)

        assert_series_equal(expected_series1, actual_series1)
        assert_series_equal(expected_series2, actual_series2)
        assert_dataframes_equal(expected_dataframe, actual_dataframe)