Exemplo n.º 1
0
def cal_factor_group_return(factor_data, periods=(20,), prices=None, group_by=None,
    quantiles=5, freq='1d', **kwargs):
    """基于alphalens计算因子收益率"""
    stocklist = pd.DataFrame(np.ones(len(factor_data)), index=factor_data.index, columns=['stocklist'])
    stocklist = stockFilter.typical(stocklist)
    factor_data = factor_data.reindex(stocklist.index)

    start = factor_data.index.get_level_values('date').min()
    start = tc.tradeDayOffset(start, -5)
    end = factor_data.index.get_level_values('date').max()
    end = tc.tradeDayOffset(end, max(periods)+1, freq=freq)
    if prices is None:
        prices = data_source.load_factor('adj_close', '/stocks/', start_date=start,
                                         end_date=end)['adj_close'].unstack()
    elif isinstance(prices, pd.DataFrame):
        if prices.index.nlevels == 2:
            prices = prices.iloc[:, 0].unstack()
    else:
        raise ValueError('prices 格式非法!')
    if freq != '1d':
        date_index = tc.get_trade_days(start, end, freq, retstr=None)
        prices = prices.reindex(date_index, copy=False)
    if_groupby = group_by is not None
    merge_data = get_clean_factor_and_forward_returns(factor_data, prices, group_by,
                                                      periods=periods, binning_by_group=if_groupby,
                                                      **kwargs)
    return merge_data
    def test_getFactorsAlphalens(self):
        factorDf, prices = self.getDataDictOfMatrixAlphalens(
            instrumentList=self.instrumentList,
            ratioList=self.ratio_list,
            fromDate=self.fromDate,
            toDate=self.toDate)
        # factorDf, prices = self.factor_service.getDataDictOfMatrixAlphalens(instrumentList=self.instrumentList,
        #                                                      ratioList=self.ratio_list,
        #                                                      fromDate=self.fromDate,
        #                                                      toDate=self.toDate
        #                                                      )

        self.assertIsNotNone(factorDf)
        self.assertIsNotNone(prices)
        # self.saveDataDict(factorDf, prices)

        factor = factorDf[self.ratio_list[0]]
        self.assertIsNotNone(factor)

        # factor_data = addFactorReturns(prices, factorDf, n_fwd_days=3)
        factor_data = get_clean_factor_and_forward_returns(
            factor,
            prices,
            # groupby=self.factor_groups,
            quantiles=None,
            bins=2,
            periods=[3],
            filter_zscore=None,
            max_loss=1  # .35
        )

        self.assertIsNotNone(factor_data)
Exemplo n.º 3
0
def test_performance(factor, prices):
    import matplotlib.pyplot as plt
    from alphalens import utils, performance, plotting

    # 持股收益-逐只
    stocks_holding_return = utils.get_clean_factor_and_forward_returns(factor,
                                                                       prices,
                                                                       quantiles=5,
                                                                       periods=(1, 5, 10))

    print("因子的IC值:")
    ic = performance.factor_information_coefficient(stocks_holding_return)
    print(ic)
    plotting.plot_ic_hist(ic)
    plt.show()
    plotting.plot_ic_ts(ic)
    plt.show()

    print("平均IC值-月:")
    mean_ic = performance.mean_information_coefficient(stocks_holding_return,
                                                       by_time="M")
    plotting.plot_monthly_ic_heatmap(mean_ic)
    plt.show()

    # 按quantile区分的持股平均收益(减去了总体平均值)
    mean_return_by_q = performance.mean_return_by_quantile(stocks_holding_return,
                                                           by_date=True,
                                                           demeaned=True)[0]
    # 按quantile画出累积持有收益
    for i in [1, 5, 10]:
        plotting.plot_cumulative_returns_by_quantile(mean_return_by_q,
                                                     period=i)
        plt.show()
Exemplo n.º 4
0
def cal_ic_by_alphalens(factor_data, prices=None, group_by=None, periods=(20,), **kwargs):
    """调用alphalens计算因子IC
    """
    factor_data = factor_data.copy()
    if isinstance(factor_data, pd.DataFrame):
        factor_data = factor_data.iloc[:, 0]
    factor_data.index.names = ['date', 'asset']

    if prices is None:
        start = factor_data.index.get_level_values('date').min()
        start = tc.tradeDayOffset(start, -5)
        end = factor_data.index.get_level_values('date').max()
        end = tc.tradeDayOffset(end, max(periods))
        prices = data_source.load_factor('adj_close', '/stocks/', start_date=start,
                                         end_date=end)['adj_close'].unstack()
    elif isinstance(prices, pd.DataFrame):
        if prices.index.nlevels == 2:
            prices = prices.iloc[:, 0].unstack()
    else:
        raise ValueError('prices 格式非法!')
    
    merge_data = get_clean_factor_and_forward_returns(factor_data, prices,
                                                      group_by, periods=periods, **kwargs)
    by_group = group_by is not None
    ic = factor_information_coefficient(merge_data, group_adjust=False, by_group=by_group)
    return ic
Exemplo n.º 5
0
    def _get_clean_factor_and_fwd_return(self, factor_score, freq):
        price = get_sec_price(start_date=str(self._tiaocang_date[0])[:10],
                              end_date=str(self._tiaocang_date[-1])[:10],
                              sec_ids=self._sec_ID,
                              data_source=self._data_source,
                              freq=freq)

        factor_and_return = get_clean_factor_and_forward_returns(
            factor=factor_score,
            prices=price,
            groupby_labels=IndexComp.get_industry_name_dict(),
            periods=self._periods)
        return factor_and_return
Exemplo n.º 6
0
def get_IC_score(all_data, price):
    IC_score = pd.DataFrame()
    IC_ = pd.DataFrame()
    for factor in all_data.columns:
        single_factor_series = all_data[factor]
        factor_return = utils.get_clean_factor_and_forward_returns(
            single_factor_series, price, max_loss=0.99)
        IC = performance.factor_information_coefficient(factor_return)
        a = IC.iloc[:, 1]
        IC_ = pd.concat([IC_, IC.iloc[:, 1]])
        IC = pd.Series([
            IC.mean()[1],
            len(a[a > 0.02]) / len(a),
            performance.factor_returns(factor_return).iloc[:, 1].mean(),
            IC.mean()[1] / IC.std()[1]
        ])
        IC_score = IC_score.append(IC, ignore_index=True)
    IC_score.columns = [['IC_mean', 'perc_above_0.02', 'average_return', 'IR']]
    IC_score['factor'] = all_data.columns

    return IC_score, IC_
Exemplo n.º 7
0
    def factor_and_forward_returns(self):
        feature = self.feature.reset_index()
        feature = feature.assign(
            date=pd.to_datetime(feature.date)).set_index('date')
        feature.index = feature.index.tz_localize('UTC')
        feature = feature.reset_index().set_index(['date', 'code'])

        panelprice = deepcopy(self.stock_data.closepanel)
        panelprice.index = pd.to_datetime(panelprice.index).tz_localize('UTC')
        return get_clean_factor_and_forward_returns(feature,
                                                    panelprice,
                                                    groupby=None,
                                                    binning_by_group=False,
                                                    quantiles=10,
                                                    bins=None,
                                                    periods=(1, 5, 10),
                                                    filter_zscore=20,
                                                    groupby_labels=None,
                                                    max_loss=0.15,
                                                    zero_aware=False,
                                                    cumulative_returns=True)
Exemplo n.º 8
0
    2: 'small_MC',
    3: 'mid_MC',
    4: 'big_MC',
    5: 'very_big_MC'
}
mc_group.tail()

# #### 下面就进行分析了,一共有
# ##### Quantiles Statistics,Returns Analysis,Information Analysis,Turnover Analysis

# #### 整理数据成规定的格式:

get_ipython().magic('pinfo utils.get_clean_factor_and_forward_returns')

# 大致意思就是将上面得到的数据有:'因子数据','价格','市值数据','市值分组的标签'等数据(要符合规格)代入get_clean_factor_and_forward_returns这个函数就可以得到 一个多重索引的dataframe,包含了alpha(在factor那列),每个时期的预期收益(1,5,10),因子分组的组号(factor_quantile),可能还会有按另一个因子(此处是市值)的分组(group)
facs_data_analysis = utils.get_clean_factor_and_forward_returns(
    series_facs_datas, price, groupby=mc_group, groupby_labels=mc_label)

# 由于factor那列是object类型,转换成float方便下面继续分析
facs_data_analysis['factor'] = np.float128(facs_data_analysis['factor'])

# 1、 查看一下summary
get_ipython().magic('pinfo tears.create_summary_tear_sheet')

# 返回的是一个简易的summary包含(Quantiles Statistics,Returns Analysis,Information Analysis,Turnover Analysis)

tears.create_summary_tear_sheet(facs_data_analysis)

# 2、 Returns Analysis
get_ipython().magic('pinfo tears.create_returns_tear_sheet')

# 收益率分析:
Exemplo n.º 9
0
pipe.add(Alpha1(), 'alpha1')

from zipline_extensions_cn.research import *

start_date = '2019-01-04'
end_date = '2020-06-01'
results = run_pipeline(pipe, start_date, end_date)

pricing_data = get_pricing(
    tickers=results.index.
    levels[1],  # Finds all assets that appear at least once in "factor_data"
    start_date=start_date,
    end_date=
    end_date,  # must be after run_pipeline()'s end date. Explained more in lesson 4
    field=
    'close'  # Generally, you should use open pricing. Explained more in lesson 4
)
from alphalens.utils import get_clean_factor_and_forward_returns

merged_data = get_clean_factor_and_forward_returns(
    factor=results,
    prices=pricing_data,
    periods=(1, 2, 5),
    max_loss=0.6,
    quantiles=3,
)

from alphalens.tears import *

create_full_tear_sheet(merged_data)
# In[10]:

with pd.HDFStore('../../data/assets.h5') as store:
    sp500 = store['sp500/prices'].close
sp500 = sp500.resample('D').ffill().tz_localize('utc').filter(
    prices.index.get_level_values(0))
sp500.head()

# We can create the alphalens input data in the required format using the `get_clean_factor_and_forward_returns` utility function that also returns the signal quartiles and the forward returns for the given holding periods:

# In[11]:

HOLDING_PERIODS = (5, 10, 21, 42)
QUANTILES = 5
alphalens_data = get_clean_factor_and_forward_returns(factor=factor_data,
                                                      prices=prices,
                                                      periods=HOLDING_PERIODS,
                                                      quantiles=QUANTILES)

# The `alphalens_data` `DataFrame` contains the returns on an investment in the given asset on a given date for the indicated holding period, as well as the factor value, that is, the asset's `MeanReversion` ranking on that date, and the corresponding quantile value:

# In[12]:

alphalens_data.head()

# The forward returns and the signal quantiles are the basis for evaluating the predictive power of the signal. Typically, a factor should deliver markedly different returns for distinct quantiles, such as negative returns for the bottom quintile of the factor values and positive returns for the top quantile.

# ## Summary Tear Sheet

# In[13]:

create_summary_tear_sheet(alphalens_data)
Exemplo n.º 11
0
6. 收益指标:回测收益,回测年化收益,基准收益,基准年化收益
   风险指标:最大回撤越小越好(30%以内), 夏普比率越大越好(1以上)
"""
import pandas as pd
import numpy as np
import scipy.stats as st
from alphalens import tears, performance, plotting, utils

df = pd.DataFrame([[1, 2], [4, 5]], columns=["A", "B"])

# 计算斯皮尔相关系数Rank IC,取值 [-1, 1]之间
print(st.spearmanr(df["A"], df["B"]))

"""使用alphalens更简易的做因子分析"""
# 输入因子表和收盘价表到返回到期收益率表,再将因子表和到期收益表整合返回综合因子数据表
factor_data = utils.get_clean_factor_and_forward_returns("factor", "price")
# 因子IC的计算
IC = performance.factor_information_coefficient(factor_data)
# 因子时间序列和移动平均图,看出一个因子在时间上的正负性、
plotting.plot_ic_ts(IC)
# 因子分布直方图,IC平均值,标准差
plotting.plot_ic_hist(IC)
# 热力图
mean_monthly_ic = performance.mean_information_coefficient(factor_data, by_time="1m")
plotting.plot_monthly_ic_heatmap(mean_monthly_ic)
# IC分析合集
tears.create_information_tear_sheet(factor_data)

# 收益率分析
tears.create_returns_tear_sheet(factor_data)
# 因子的每一期的收益(因子收益)
Exemplo n.º 12
0
def alpha_factor_function():
    return 10


def make_pipeline():
    return Pipeline(columns={'column_name': alpha_factor_function()})


my_pipe = make_pipeline()

pipeline_data = run_pipeline(my_pipe,
                             start_date='2014-1-1',
                             end_date='2016-1-1').dropna()

# Alphalens (second cell)

from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet

pricing_data = get_pricing(
    symbols=pipeline_data.index.
    levels[1],  # Finds all assets that appear at least once in the pipeline
    start_date='2014-1-1',
    end_date='2016-2-1',  #1 trading day after end date of pipeline
    fields='open_price')

merged_data = get_clean_factor_and_forward_returns(
    factor=pipeline_data['column_name'], prices=pricing_data)

create_full_tear_sheet(merged_data)
    return Pipeline(columns={
        'factor to analyze': factor_to_analyze,
        'sector': sector
    },
                    screen=base_universe & sector.notnull()
                    & factor_to_analyze.notnull())


factor_data = run_pipeline(make_pipeline(), '2012-1-1', '2019-1-1')
pricing_data = get_pricing(factor_data.index.levels[1],
                           '2012-1-1',
                           '2020-6-1',
                           fields='open_price')

sector_labels, sector_labels[-1] = dict(Sector.SECTOR_NAMES), "Unknown"

merged_data = get_clean_factor_and_forward_returns(
    factor=factor_data['factor to analyze'],
    prices=pricing_data,
    quantiles=5,
    groupby=factor_data['sector'],
    groupby_labels=sector_labels,
    binning_by_group=True,
    periods=(198, 252))  # week = 5, month = 21, quarter = 63, year = 252

mean_information_coefficient(merged_data).plot(title="IC Decay")

create_information_tear_sheet(merged_data, by_group=True, group_neutral=True)
create_returns_tear_sheet(merged_data, by_group=True, group_neutral=True)
create_full_tear_sheet(merged_data)
Exemplo n.º 14
0
 def _get_clean_factor_and_fwd_return(self):
     factor = get_clean_factor_and_forward_returns(factor=self._factor,
                                                   prices=self._price,
                                                   groupby=self._industry,
                                                   groupby_labels=IndexComp.get_industry_name_dict())
     return factor
Exemplo n.º 15
0
# stock_factor_1 = stock_factor_1.groupby('trade_date').apply(winsorize_series)
# stock_factor_1 = stock_factor_1.groupby('trade_date').apply(standardize_series)
# stock_factor_2 = stock_factor_2.groupby('trade_date').apply(winsorize_series)
# stock_factor_2 = stock_factor_2.groupby('trade_date').apply(standardize_series)
stock_factor = stock_factor_1
stock_factor = stock_factor.astype(float)

#   申万行业分类
industry_dict = sw_industry.set_index('code')['index_code'].to_dict()
industry_labels = sw_industry.set_index('index_code')['index_name'].to_dict()

#   分析因子
factor_data = get_clean_factor_and_forward_returns(stock_factor,
                                                   stock_close,
                                                   groupby=industry_dict,
                                                   quantiles=5,
                                                   binning_by_group=False,
                                                   periods=(1, 5, 10),
                                                   filter_zscore=None)

create_full_tear_sheet(factor_data,
                       long_short=False,
                       group_neutral=False,
                       by_group=False)
# create_summary_tear_sheet(factor_data, long_short=True, group_neutral=True)
# create_event_returns_tear_sheet(factor_data, stock_close, avgretplot=(3, 11),
#                                 long_short=False, group_neutral=True, by_group=True)
# create_event_study_tear_sheet(factor_data, stock_close)
# returns, positions, benchmark_rets = create_pyfolio_input(factor_data,
#                                                           period='1D',
#                                                           capital=None,
Exemplo n.º 16
0
import pandas as pd
import tushare as ts
from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet

pro = ts.pro_api()
df = pro.daily(ts_code='000001.SZ,600982.SH',
               start_date='20200101',
               end_date='20211122')
df.index = pd.to_datetime(df['trade_date'])
df.index.name = None
df.sort_index(inplace=True)

# 多索引的因子列,第一个索引为日期,第二个索引为股票代码
assets = df.set_index([df.index, df['ts_code']], drop=True)

# column为股票代码,index为日期,值为收盘价
close = df.pivot_table(index='trade_date', columns='ts_code', values='close')
close.index = pd.to_datetime(close.index)

from alphalens.utils import get_clean_factor_and_forward_returns
from alphalens.tears import create_full_tear_sheet

ret = get_clean_factor_and_forward_returns(assets[['pct_chg']], close)
create_full_tear_sheet(ret, long_short=False)

# https://github.com/Ckend/alphalens
Exemplo n.º 17
0
    def get_factors_ic_df(self,
                          factors_dict,
                          pool,
                          start,
                          end,
                          periods=(1, 5, 10),
                          quantiles=None,
                          bins=None,
                          price=None):
        """
        获取指定周期下的多个因子ic值序列矩阵
        :param factors_dict: 若干因子组成的字典(dict),形式为:
                             {"factor_name_1":factor_1,"factor_name_2":factor_2}
                            每个因子值格式为一个MultiIndex Series,索引(index)为date(level 0)和asset(level 1),
                             包含一列factor值。
        :param pool: 股票池范围(list),如:["000001.XSHE","600300.XSHG",......]
        :param start: 起始时间 (datetime)
        :param end: 结束时间 (datetime)
        :param periods: 指定持有周期(tuple),周期值类型为(int)
        :param quantiles: 根据因子大小将股票池划分的分位数量(int)
        :param price (optional): 包含了pool中所有股票的价格时间序列(pd.Dataframe),索引(index)为datetime,columns为各股票代码,与pool对应。
        :return: ic_df_dict 指定的不同周期下的多个因子ic值序列矩阵所组成的字典(dict), 键为周期(int),值为因子ic值序列矩阵(ic_df)。
                 如:{1:ic_df_1,5:ic_df_5,10:ic_df_10}
                 ic_df(ic值序列矩阵) 类型pd.Dataframe,索引(index)为datetime,columns为各因子名称,与factors_dict中的对应。
                 如:

                           BP	   CFP	   EP	  ILLIQUIDITY	REVS20	   SRMI	   VOL20
                date
                2016-06-24	0.165260	0.002198	0.085632	-0.078074	0.173832	0.214377	0.068445
                2016-06-27	0.165537	0.003583	0.063299	-0.048674	0.180890	0.202724	0.081748
                2016-06-28	0.135215	0.010403	0.059038	-0.034879	0.111691	0.122554	0.042489
                2016-06-29	0.068774	0.019848	0.058476	-0.049971	0.042805	0.053339	0.079592
                2016-06-30	0.039431	0.012271	0.037432	-0.027272	0.010902	0.077293	-0.050667
        """

        from fxdayu_data import DataAPI
        import datetime
        import numpy as np
        from alphalens import utils, performance

        def get_price_data(pool, start, end, max_window=10):
            data = DataAPI.candle(tuple(pool), "D",
                                  start=start - datetime.timedelta(days=max_window),
                                  end=end + datetime.timedelta(days=max_window))
            data = data.replace(to_replace=0, value=np.NaN)
            return data

        if (price is None):
            price_data = get_price_data(pool.tolist(), start, end, max_window=max(periods))
            price = price_data.minor_xs("close")

        ic_dict = {}
        for factor_name in factors_dict.keys():
            factor_value = factors_dict[factor_name]
            # 持股收益-逐只
            stocks_holding_return = utils.get_clean_factor_and_forward_returns(factor_value, price, quantiles=quantiles,
                                                                               bins=bins, periods=periods)
            ic = performance.factor_information_coefficient(stocks_holding_return)
            ic_dict[factor_name] = ic

        # 获取factor_value的时间(index),将用来生成 factors_ic_df 的对应时间(index)
        times = sorted(pd.concat([pd.Series(factors_dict[factor_name].index.levels[0]) for factor_name in factors_dict.keys()]).unique())
        ic_df_dict = {}
        for period in periods:
            ic_table = []
            for factor_name in ic_dict.keys():
                ic_by_period = pd.DataFrame(ic_dict[factor_name][period])
                ic_by_period.columns = [factor_name, ]
                ic_table.append(ic_by_period)
            ic_df_dict[period] = pd.concat(ic_table, axis=1).dropna()
            ic_df_dict[period] = ic_df_dict[period].reindex(times)

        return ic_df_dict
lr_factor = get_factor(lr_predictions.predicted.swaplevel())
lr_factor.head()

# In[10]:

tickers = lr_factor.index.get_level_values('symbol').unique()

# In[11]:

trade_prices = get_trade_prices(tickers, 2014, 2017)
trade_prices.info()

# In[12]:

lr_factor_data = get_clean_factor_and_forward_returns(factor=lr_factor,
                                                      prices=trade_prices,
                                                      quantiles=5,
                                                      periods=(1, 5, 10, 21))
lr_factor_data.info()

# In[13]:

create_summary_tear_sheet(lr_factor_data)

# ## Ridge Regression

# In[14]:

best_ridge_alpha = get_best_alpha(ridge_scores)
ridge_predictions = ridge_predictions[ridge_predictions.alpha ==
                                      best_ridge_alpha].drop('alpha', axis=1)