Exemplo n.º 1
0
def get_momentum(stock_list, date, market_cap_on_current_day):

    trading_date_525_before = rqdatac.get_trading_dates(date - timedelta(days=1000), date, country='cn')[-525]

    trading_date_21_before = rqdatac.get_trading_dates(date - timedelta(days=40), date, country='cn')[-21]

    # 共需要 525 - 21 = 504 个交易日的收益率

    exp_weight = get_exponential_weight(half_life=126, length=504)

    # 提取股票价格数据,对于退市情况,考虑作股价向前填补(日收益率为0)

    daily_return = rqdatac.get_price(stock_list, trading_date_525_before, trading_date_21_before, frequency='1d',
                                     fields='close').fillna(method='ffill').pct_change()[1:]

    # 剔除收益率数据存在空值的股票

    inds = daily_return.isnull().sum()[daily_return.isnull().sum() > 0].index

    daily_return = daily_return.drop(daily_return[inds], axis=1)

    # 把复利无风险日收益率转为日收益率

    compounded_risk_free_return = rqdatac.get_yield_curve(start_date=trading_date_525_before, end_date=date, tenor='0S')

    risk_free_return = (((1 + compounded_risk_free_return) ** (1 / 365)) - 1).loc[daily_return.index]

    relative_strength = np.log(1 + daily_return).T.subtract(np.log(1 + risk_free_return.iloc[:, 0])).dot(exp_weight)

    processed_relative_strength = winsorization_and_market_cap_weighed_standardization(relative_strength,
                                                                                       market_cap_on_current_day[
                                                                                           relative_strength.index])

    return processed_relative_strength
Exemplo n.º 2
0
def gen_yield_curve(d):
    yield_curve = rqdatac.get_yield_curve(start_date=START_DATE,
                                          end_date=datetime.date.today())
    yield_curve.index = [
        convert_date_to_date_int(d) for d in yield_curve.index
    ]
    yield_curve.index.name = 'date'
    with h5py.File(os.path.join(d, 'yield_curve.h5'), 'w') as f:
        f.create_dataset('data', data=yield_curve.to_records())
Exemplo n.º 3
0
def get_cumulative_range(stock_list, date, market_cap_on_current_day):

    trading_date_253_before = rqdatac.get_trading_dates(date -
                                                        timedelta(days=500),
                                                        date,
                                                        country='cn')[-253]

    daily_return = rqdatac.get_price(
        stock_list,
        trading_date_253_before,
        date,
        frequency='1d',
        fields='close').fillna(method='ffill').pct_change()[1:]

    # 剔除收益率数据存在空值的股票

    inds = daily_return.isnull().sum()[daily_return.isnull().sum() > 0].index

    daily_return = daily_return.drop(daily_return[inds], axis=1)

    # 把复利无风险日收益率转为日收益率

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=trading_date_253_before, end_date=date, tenor='3M')

    risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) -
                        1).loc[daily_return.index]

    # 每21个交易日为一个时间区间

    spliting_points = np.arange(0, 273, 21)

    cumulative_return = pd.DataFrame()

    for period in range(1, len(spliting_points)):

        compounded_return = (
            (1 + daily_return.iloc[spliting_points[0]:spliting_points[period]]
             ).cumprod() - 1).iloc[-1]

        compounded_risk_free_return = (
            (1 +
             risk_free_return.iloc[spliting_points[0]:spliting_points[period]]
             ).cumprod() - 1).iloc[-1]

        cumulative_return[period] = np.log(1 + compounded_return).subtract(
            np.log(1 + compounded_risk_free_return.iloc[0]))

    cumulative_return = cumulative_return.cumsum(axis=1)

    processed_cumulative_range = winsorization_and_market_cap_weighed_standardization(
        cumulative_return.T.max() - cumulative_return.T.min(),
        market_cap_on_current_day)

    return processed_cumulative_range
Exemplo n.º 4
0
def get_stock_beta(stock_list, stock_excess_return, benchmark,
                   latest_trading_date, market_cap_on_current_day):

    trading_date_253_before = rqdatac.get_trading_dates(latest_trading_date -
                                                        timedelta(days=500),
                                                        latest_trading_date,
                                                        country='cn')[-253]

    exp_weight = get_exponential_weight(half_life=63, length=252)

    weighted_stock_excess_return = stock_excess_return.T.multiply(exp_weight).T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=trading_date_253_before,
        end_date=latest_trading_date,
        tenor='3M')

    risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) -
                        1).loc[stock_excess_return.index]

    market_portfolio_daily_return = rqdatac.get_price(
        benchmark,
        trading_date_253_before,
        latest_trading_date,
        frequency='1d',
        fields='close').fillna(method='ffill').pct_change()[1:]

    market_portfolio_excess_return = market_portfolio_daily_return.subtract(
        risk_free_return.iloc[:, 0])

    weighted_market_portfolio_excess_return = market_portfolio_excess_return.multiply(
        exp_weight).T

    weighted_market_portfolio_variance = weighted_market_portfolio_excess_return.var(
    )

    beta = [
        weighted_market_portfolio_excess_return.cov(
            weighted_stock_excess_return[stock]) /
        weighted_market_portfolio_variance
        for stock in stock_excess_return.columns
    ]

    stock_beta = pd.Series(beta, index=stock_excess_return.columns)

    # 用回归方法处理 beta 的缺失值

    imputed_stock_beta = individual_factor_imputation(
        stock_list, stock_beta, market_cap_on_current_day,
        latest_trading_date.strftime('%Y-%m-%d'))

    return imputed_stock_beta
Exemplo n.º 5
0
def get_daily_excess_return(stock_list, start_date, end_date):

    # 提取股票价格数据,对于退市情况,考虑作股价向前填补(日收益率为0)

    stock_daily_return = rqdatac.get_price(
        stock_list,
        rqdatac.get_previous_trading_date(start_date),
        end_date,
        frequency='1d',
        fields='close').fillna(method='ffill').pct_change()[1:]

    # 剔除收益率数据存在空值的股票

    inds = stock_daily_return.isnull().sum()[
        stock_daily_return.isnull().sum() > 0].index

    filtered_stock_daily_return = stock_daily_return.drop(inds, axis=1)

    # 经测试发现,中证全指(000985)作为 market portfolio 的效果最好

    market_portfolio_daily_return = rqdatac.get_price(
        '000985.XSHG',
        rqdatac.get_previous_trading_date(start_date),
        end_date,
        frequency='1d',
        fields='close').pct_change()[1:]

    # 计算无风险日收益率

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=start_date, end_date=end_date, tenor='3M')

    risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) -
                        1).loc[filtered_stock_daily_return.index]

    daily_excess_return = filtered_stock_daily_return.T.subtract(
        risk_free_return.iloc[:, 0]).T

    market_portfolio_daily_excess_return = market_portfolio_daily_return.subtract(
        risk_free_return.iloc[:, 0])

    return daily_excess_return, market_portfolio_daily_excess_return
Exemplo n.º 6
0
def factor_return_estimation(latest_trading_date, factor_exposure):

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(
        order_book_ids=factor_exposure.index.tolist(),
        start_date=previous_trading_date,
        end_date=latest_trading_date,
        fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(
        id_or_symbols=factor_exposure.index.tolist(),
        factor='a_share_market_val',
        start_date=previous_trading_date,
        end_date=previous_trading_date)

    missing_market_cap_stock = market_cap[market_cap.isnull() ==
                                          True].index.tolist()

    if len(missing_market_cap_stock) > 0:

        price = rqdatac.get_price(missing_market_cap_stock,
                                  previous_trading_date,
                                  previous_trading_date,
                                  fields='close',
                                  frequency='1d').T

        shares = rqdatac.get_shares(missing_market_cap_stock,
                                    previous_trading_date,
                                    previous_trading_date,
                                    fields='total_a').T

        market_cap[market_cap.isnull() == True] = (
            price * shares)[previous_trading_date]

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    if str(previous_trading_date) > '2014-01-01':

        industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\
                            '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料',  '建筑装饰', '电气设备',\
                            '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备']
    else:

        industry_factors = [
            '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器',
            '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备',
            '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属'
        ]

    #style_factor = ['beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth', 'book_to_price',
    #                'leverage', 'liquidity','size','non_linear_size']

    industry_total_market_cap = market_cap.dot(
        factor_exposure.loc[market_cap.index][industry_factors])

    factor_return_series = pd.DataFrame()

    # 对10个风格因子不添加约束,对 GICS 32个行业添加约束

    factor_return_series['whole_market'] = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\
                                                                     industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap))

    # 沪深300

    csi_300_components = rqdatac.index_components(index_name='000300.XSHG',
                                                  date=previous_trading_date)

    csi_300_components = list(
        set(market_cap.index.tolist()).intersection(set(csi_300_components)))

    # 各行业市值之和,用于行业收益率约束条件

    csi_300_industry_total_market_cap = market_cap[csi_300_components].dot(
        factor_exposure[industry_factors].loc[csi_300_components])

    # 若行业市值之和小于100,则认为基准没有配置该行业

    missing_industry = csi_300_industry_total_market_cap[
        csi_300_industry_total_market_cap < 100].index

    csi_300_industry_total_market_cap = csi_300_industry_total_market_cap.drop(
        missing_industry)

    # 将沪深300股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算

    csi_300_factor_exposure = factor_exposure.loc[csi_300_components]

    csi_300_factor_exposure['non_linear_size'] = orthogonalize(
        target_variable=np.power(csi_300_factor_exposure['size'], 3),
        reference_variable=csi_300_factor_exposure['size'],
        regression_weight=np.sqrt(market_cap[csi_300_components]) /
        (np.sqrt(market_cap[csi_300_components]).sum()))

    factor_return_series['csi_300'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_300_components].values[0], X = csi_300_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_300_components],\
                                                                industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap))

    # 中证500

    csi_500_components = rqdatac.index_components(index_name='000905.XSHG',
                                                  date=previous_trading_date)

    csi_500_components = list(
        set(market_cap.index.tolist()).intersection(set(csi_500_components)))

    csi_500_industry_total_market_cap = market_cap[csi_500_components].dot(
        factor_exposure[industry_factors].loc[csi_500_components])

    missing_industry = csi_500_industry_total_market_cap[
        csi_500_industry_total_market_cap < 100].index

    csi_500_industry_total_market_cap = csi_500_industry_total_market_cap.drop(
        missing_industry)

    # 将中证500股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算

    csi_500_factor_exposure = factor_exposure.loc[csi_500_components]

    csi_500_factor_exposure['non_linear_size'] = orthogonalize(
        target_variable=np.power(csi_500_factor_exposure['size'], 3),
        reference_variable=csi_500_factor_exposure['size'],
        regression_weight=np.sqrt(market_cap[csi_500_components]) /
        (np.sqrt(market_cap[csi_500_components]).sum()))

    factor_return_series['csi_500'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_500_components].values[0], X = csi_500_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_500_components],\
                                                                industry_total_market_cap = csi_500_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_500_industry_total_market_cap))

    ### 中证800

    csi_800_components = rqdatac.index_components(index_name='000906.XSHG',
                                                  date=previous_trading_date)

    csi_800_components = list(
        set(market_cap.index.tolist()).intersection(set(csi_800_components)))

    csi_800_industry_total_market_cap = market_cap[csi_800_components].dot(
        factor_exposure[industry_factors].loc[csi_800_components])

    missing_industry = csi_800_industry_total_market_cap[
        csi_800_industry_total_market_cap < 100].index

    csi_800_industry_total_market_cap = csi_800_industry_total_market_cap.drop(
        missing_industry)

    # 将中证800股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算

    csi_800_factor_exposure = factor_exposure.loc[csi_800_components]

    csi_800_factor_exposure['non_linear_size'] = orthogonalize(
        target_variable=np.power(csi_800_factor_exposure['size'], 3),
        reference_variable=csi_800_factor_exposure['size'],
        regression_weight=np.sqrt(market_cap[csi_800_components]) /
        (np.sqrt(market_cap[csi_800_components]).sum()))

    factor_return_series['csi_800'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_800_components].values[0], X = csi_800_factor_exposure.drop(missing_industry, axis =1), weight = normalized_regression_weight[factor_exposure.index][csi_800_components],\
                                                                industry_total_market_cap = csi_800_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_800_industry_total_market_cap))

    # 若指数在特定行业中没有配置任何股票,则因子收益率为 0

    return factor_return_series.replace(np.nan, 0)
Exemplo n.º 7
0
def black_litterman_prep(order_book_ids,
                         start_date,
                         investors_views,
                         investors_views_indicate_M,
                         investors_views_uncertainty=None,
                         asset_type=None,
                         market_weight=None,
                         risk_free_rate_tenor=None,
                         risk_aversion_coefficient=None,
                         excess_return_cov_uncertainty=None,
                         confidence_of_views=None,
                         windows=None,
                         data_freq=None):
    """
    Generate expected return and expected return covariance matrix with Black-Litterman model. Suppose we have N assets
    and K views. The method can only support daily data so far.
    It's highly recommended to use your own ways to create investors_views_uncertainty, risk_aversion_coefficient and
    excess_return_cov_uncertainty beforehand to get the desired distribution parameters.
    :param order_book_ids: str list. A group of assets;
    :param asset_type: str. "fund" or "stock";
    :param start_date: str. The first day of backtest period;
    :param windows: int. Interval length of sample; Default: 132;
    :param investors_views: K*1 numpy matrix. Each row represents one view;
    :param investors_views_indicate_M: K*N numpy matrix. Each row corresponds to one view. Indicate which view is
    involved during calculation;
    :param investors_views_uncertainty: K*K diagonal matrix, optional. If it is skipped, He and Litterman's method will
    be called to generate diagonal matrix if confidence_of_view is also skipped; Idzorek's method will be called if
    confidence_of_view is passed in; Has to be non-singular;
    :param market_weight: floats list, optional. Weights for market portfolio; Default: Equal weights portfolio;
    :param risk_free_rate_tenor: str, optional. Risk free rate term. Default: "0S"; Support input: "0S", "1M", "3M",
    "6M", "1Y";
    :param risk_aversion_coefficient: float, optional. If no risk_aversion_coefficient is passed in, then
    risk_aversion_coefficient = market portfolio risk premium / market portfolio volatility;
    :param excess_return_cov_uncertainty: float, optional. Default: 1/T where T is the time length of sample;
    :param confidence_of_views: floats list, optional. Represent investors' confidence levels on each view.
    :param data_freq: str. Support input: "D": daily data; "W": weekly data; "M": monthly data.
    Weekly data means the close price at the end of each week is taken; monthly means the close price at the end of each
    month. When weekly and monthly data are used, suspended days issues will not be considered. In addition, weekly and
    monthly data don't consider public holidays which have no trading. Users should use a windows a little bit larger
    to get desired data length.
    Users should be very careful when using weekly or monthly data to avoid the observations have too short length.
    :return:
    numpy matrix. Expected return vector;
    numpy matrix. Covariance matrix of expected return;
    float. risk_aversion_coefficient;
    numpy ndarray. investors_views_uncertainty.
    """

    risk_free_rate_dict = {'0S': 1, '1M': 30, '3M': 92, '6M': 183, '1Y': 365}

    if market_weight is None:
        market_weight = pd.DataFrame([1 / len(order_book_ids)] *
                                     len(order_book_ids),
                                     index=order_book_ids)
    if windows is None:
        windows = 132
    if data_freq is None:
        data_freq = "D"
    if asset_type is None:
        asset_type = "fund"
    if risk_free_rate_tenor is None:
        risk_free_rate_tenor = "0S"

    # Clean data
    end_date = rqdatac.get_previous_trading_date(start_date)
    end_date = pd.to_datetime(end_date)
    clean_period_prices, reset_start_date = (data_process(
        order_book_ids, asset_type, start_date, windows, data_freq)[i]
                                             for i in [0, 2])

    if excess_return_cov_uncertainty is None:
        excess_return_cov_uncertainty = 1 / clean_period_prices.shape[0]

    # Fetch risk free rate data
    reset_start_date = rqdatac.get_next_trading_date(reset_start_date)
    risk_free_rate = rqdatac.get_yield_curve(reset_start_date,
                                             end_date,
                                             tenor=risk_free_rate_tenor,
                                             country='cn')
    if data_freq is not "D":
        risk_free_rate = risk_free_rate.asfreq(data_freq, method="pad")
    risk_free_rate[data_freq] = pd.Series(
        np.power(1 + risk_free_rate.iloc[:, 0],
                 risk_free_rate_dict[risk_free_rate_tenor] / 365) - 1,
        index=risk_free_rate.index)

    # Calculate risk premium for each equity
    clean_period_prices_pct_change = clean_period_prices.pct_change()
    clean_period_excess_return = clean_period_prices_pct_change.subtract(
        risk_free_rate[data_freq], axis=0)

    # Wash out the ones in kick_out_list
    clean_market_weight = market_weight.loc[clean_period_prices.columns.values]
    temp_sum_weight = clean_market_weight.sum()
    clean_market_weight = clean_market_weight.div(temp_sum_weight)

    # If no risk_aversion_coefficient is passed in, then
    # risk_aversion_coefficient = market portfolio risk premium / market portfolio volatility
    if risk_aversion_coefficient is None:
        market_portfolio_return = np.dot(clean_period_prices_pct_change,
                                         clean_market_weight)
        risk_aversion_coefficient = ((market_portfolio_return[1:].mean() -
                                      risk_free_rate[data_freq].mean()) /
                                     market_portfolio_return[1:].var())

    clean_period_excess_return_cov = clean_period_excess_return[1:].cov()
    equilibrium_return = np.multiply(
        np.dot(clean_period_excess_return_cov, clean_market_weight),
        risk_aversion_coefficient)

    # Generate the investors_views_uncertainty matrix if none is passed in
    if investors_views_uncertainty is None:
        if confidence_of_views is None:
            # He and Litteman's(1999) method to generate the uncertainty diagonal matrix, confidence level on each view
            # doesn't need.
            Omeg_diag = list()
            for i in range(investors_views_indicate_M.shape[0]):
                temp = np.dot(
                    np.dot(investors_views_indicate_M[i, :],
                           clean_period_excess_return_cov),
                    investors_views_indicate_M[
                        i, :].transpose()) * excess_return_cov_uncertainty
                Omeg_diag.append(temp.item(0))
            investors_views_uncertainty = np.diag(Omeg_diag)
        else:
            # Idzorek's(2002) method, users can specify their confidence level on each view.
            Omeg_diag = list()
            for i in range(len(investors_views)):
                part1 = excess_return_cov_uncertainty * np.dot(
                    clean_period_excess_return_cov,
                    investors_views_indicate_M[i, :].transpose())
                part2 = 1 / (excess_return_cov_uncertainty * np.dot(
                    investors_views_indicate_M[i, :],
                    np.dot(clean_period_excess_return_cov,
                           investors_views_indicate_M[i, :].T)))
                part3 = investors_views[i] - np.dot(
                    investors_views_indicate_M[i, :], equilibrium_return)
                return_with_full_confidence = equilibrium_return + np.multiply(
                    part2 * part3, part1)
                weights_with_full_confidence = np.dot(
                    np.linalg.inv(
                        np.multiply(risk_aversion_coefficient,
                                    clean_period_excess_return_cov)),
                    return_with_full_confidence)
                temp1 = weights_with_full_confidence - clean_market_weight
                temp2 = np.multiply(
                    confidence_of_views[i],
                    np.absolute(investors_views_indicate_M[i, :].transpose()))
                tilt = np.multiply(temp1, temp2)
                weights_with_partial_confidence = clean_market_weight.as_matrix(
                ) + tilt

                def objective_fun(x):
                    temp1 = np.linalg.inv(
                        np.multiply(risk_aversion_coefficient,
                                    clean_period_excess_return_cov))
                    temp2 = np.linalg.inv(
                        np.linalg.inv(
                            np.multiply(excess_return_cov_uncertainty,
                                        clean_period_excess_return_cov)) +
                        np.multiply(
                            np.reciprocal(x),
                            np.dot(
                                investors_views_indicate_M[i, :].transpose(),
                                investors_views_indicate_M[i, :])))
                    temp3 = (np.dot(
                        np.linalg.inv(
                            np.multiply(excess_return_cov_uncertainty,
                                        clean_period_excess_return_cov)),
                        equilibrium_return) + np.multiply(
                            investors_views[i] * np.reciprocal(x),
                            investors_views_indicate_M[i, :].transpose()))
                    wk = np.dot(temp1, np.dot(temp2, temp3))
                    return np.linalg.norm(
                        np.subtract(weights_with_partial_confidence, wk))

                # Upper bound should be consistent with the magnitude of return
                upper_bound = abs(equilibrium_return.mean()) * 100
                omega_k = sc_opt.minimize_scalar(objective_fun,
                                                 bounds=(10**-8, upper_bound),
                                                 method="bounded",
                                                 options={"xatol": 10**-8})
                Omeg_diag.append(omega_k.x.item(0))
            investors_views_uncertainty = np.diag(Omeg_diag)

    # Combine all the information above to get the distribution of expected return with given views
    combined_return_covar = np.linalg.inv(
        np.linalg.inv(
            np.multiply(excess_return_cov_uncertainty,
                        clean_period_excess_return_cov)) + np.dot(
                            np.dot(investors_views_indicate_M.transpose(),
                                   np.linalg.inv(investors_views_uncertainty)),
                            investors_views_indicate_M))
    temp1 = np.dot(
        np.linalg.inv(
            np.multiply(excess_return_cov_uncertainty,
                        clean_period_excess_return_cov)), equilibrium_return)
    temp2 = np.dot(
        np.dot(investors_views_indicate_M.transpose(),
               np.linalg.inv(investors_views_uncertainty)), investors_views)
    temp = temp1 + temp2

    combined_return_mean = np.dot(combined_return_covar, temp)

    return combined_return_mean, combined_return_covar, risk_aversion_coefficient, investors_views_uncertainty
Exemplo n.º 8
0
def black_litterman_prep(order_book_ids, start_date, investors_views, investors_views_indicate_M,
                         investors_views_uncertainty=None, asset_type=None, market_weight=None,
                         risk_free_rate_tenor=None, risk_aversion_coefficient=None, excess_return_cov_uncertainty=None,
                         confidence_of_views=None):

    risk_free_rate_dict = ['0S', '1M', '2M', '3M', '6M', '9M', '1Y', '2Y', '3Y', '4Y', '5Y', '6Y', '7Y', '8Y',
                           '9Y', '10Y', '15Y', '20Y', '30Y', '40Y', '50Y']
    windows = 132
    if market_weight is None:
        market_weight = pd.DataFrame([1/len(order_book_ids)] * len(order_book_ids), index=order_book_ids)

    # Clean data
    if asset_type is None:
        asset_type = "fund"
    end_date = rqdatac.get_previous_trading_date(start_date)
    end_date = pd.to_datetime(end_date)
    clean_period_prices, reset_start_date = (pf.data_process(order_book_ids, asset_type, start_date, windows)[i]
                                             for i in [0, 2])
    
    if excess_return_cov_uncertainty is None:
        excess_return_cov_uncertainty = 1 / clean_period_prices.shape[0]

    reset_start_date = rqdatac.get_next_trading_date(reset_start_date)
    # Take daily risk free rate
    if risk_free_rate_tenor is None:
        risk_free_rate = rqdatac.get_yield_curve(reset_start_date, end_date, tenor='0S', country='cn')
    elif risk_free_rate_tenor in risk_free_rate_dict:
        risk_free_rate = rqdatac.get_yield_curve(reset_start_date, end_date, tenor=risk_free_rate_tenor,
                                                 country='cn')
    risk_free_rate['Daily'] = pd.Series(np.power(1 + risk_free_rate['0S'], 1 / 365) - 1, index=risk_free_rate.index)

    # Calculate daily risk premium for each equity
    clean_period_prices_pct_change = clean_period_prices.pct_change()
    clean_period_excess_return = clean_period_prices_pct_change.subtract(risk_free_rate['Daily'], axis=0)

    # Wash out the ones in kick_out_list
    clean_market_weight = market_weight.loc[clean_period_prices.columns.values]
    temp_sum_weight = clean_market_weight.sum()
    clean_market_weight = clean_market_weight.div(temp_sum_weight)

    # If no risk_aversion_coefficient is passed in, then
    # risk_aversion_coefficient = market portfolio risk premium / market portfolio volatility
    if risk_aversion_coefficient is None:
        market_portfolio_return = np.dot(clean_period_prices_pct_change, clean_market_weight)
        risk_aversion_coefficient = ((market_portfolio_return[1:].mean()-risk_free_rate["Daily"].mean()) /
                                     market_portfolio_return[1:].var())

    equilibrium_return = np.multiply(np.dot(clean_period_excess_return[1:].cov(), clean_market_weight),
                                     risk_aversion_coefficient)

    clean_period_excess_return_cov = clean_period_excess_return[1:].cov()
    # Generate the investors_views_uncertainty matrix if none is passed in
    if investors_views_uncertainty is None:
        if confidence_of_views is None:
            # He and Litteman's(1999) method to generate the uncertainty diagonal matrix, confidence level on each view
            # doesn't need.
            Omeg_diag = list()
            for i in range(investors_views_indicate_M.shape[0]):
                temp = np.dot(np.dot(investors_views_indicate_M[i, :], clean_period_excess_return_cov),
                              investors_views_indicate_M[i, :].transpose()) * excess_return_cov_uncertainty
                Omeg_diag.append(temp.item(0))
            investors_views_uncertainty = np.diag(Omeg_diag)
        else:
            # Idzorek's(2002) method, users can specify their confidence level on each view.
            Omeg_diag = list()
            for i in range(len(investors_views)):
                part1 = excess_return_cov_uncertainty * np.dot(clean_period_excess_return_cov,
                                                               investors_views_indicate_M[i, :].transpose())
                part2 = 1 / (excess_return_cov_uncertainty*np.dot(investors_views_indicate_M[i, :],
                                                                  np.dot(clean_period_excess_return_cov,
                                                                         investors_views_indicate_M[i, :].transpose())))
                part3 = investors_views[i]-np.dot(investors_views_indicate_M[i, :], equilibrium_return)
                return_with_full_confidence = equilibrium_return + np.multiply(part2 * part3, part1)
                weights_with_full_confidence = np.dot(np.linalg.inv(np.multiply(risk_aversion_coefficient,
                                                                    clean_period_excess_return_cov)),
                                                      return_with_full_confidence)
                temp1 = weights_with_full_confidence-clean_market_weight
                temp2 = np.multiply(confidence_of_views[i], np.absolute(investors_views_indicate_M[i, :].transpose()))
                tilt = np.multiply(temp1, temp2)
                weights_with_partial_confidence =clean_market_weight.as_matrix() + tilt

                def objective_fun(x):
                    temp1 = np.linalg.inv(np.multiply(risk_aversion_coefficient, clean_period_excess_return_cov))
                    temp2 = np.linalg.inv(np.linalg.inv(np.multiply(excess_return_cov_uncertainty,
                                                                    clean_period_excess_return_cov)) +
                                          np.multiply(np.reciprocal(x), np.dot(investors_views_indicate_M[i, :].transpose(),
                                                     investors_views_indicate_M[i, :])))
                    temp3 = (np.dot(np.linalg.inv(np.multiply(excess_return_cov_uncertainty,
                                                             clean_period_excess_return_cov)), equilibrium_return) +
                             np.multiply(investors_views[i]*np.reciprocal(x),
                                         investors_views_indicate_M[i, :].transpose()))
                    wk = np.dot(temp1, np.dot(temp2, temp3))
                    return np.linalg.norm(np.subtract(weights_with_partial_confidence, wk))

                # Upper bound should be consistent with the magnitude of return
                upper_bound = equilibrium_return.mean()*100
                omega_k = sc_opt.minimize_scalar(objective_fun, bounds=(10**-8, upper_bound), method="bounded",
                                                 options={"xatol": 10**-8})
                Omeg_diag.append(omega_k.x.item(0))
            investors_views_uncertainty = np.diag(Omeg_diag)

    # Combine all the information above to get the distribution of expected return with given views
    combined_return_covar = np.linalg.inv(np.linalg.inv(np.multiply(excess_return_cov_uncertainty, 
                                                                      clean_period_excess_return_cov))
                                            + np.dot(np.dot(investors_views_indicate_M.transpose(),
                                                            np.linalg.inv(investors_views_uncertainty)),
                                                     investors_views_indicate_M))
    temp1 = np.dot(np.linalg.inv(np.multiply(excess_return_cov_uncertainty, clean_period_excess_return_cov)), 
                   equilibrium_return)
    temp2 = np.dot(np.dot(investors_views_indicate_M.transpose(), np.linalg.inv(investors_views_uncertainty)),
                   investors_views)
    temp = temp1 + temp2

    combined_return_mean = np.dot(combined_return_covar, temp)
    return combined_return_mean, combined_return_covar, risk_aversion_coefficient, investors_views_uncertainty
def factor_return_estimation(date, factor_exposure, industry_factors):

    latest_trading_date = rqdatac.get_previous_trading_date(
        datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(
        order_book_ids=factor_exposure.index.tolist(),
        start_date=previous_trading_date,
        end_date=latest_trading_date,
        fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(
        id_or_symbols=factor_exposure.index.tolist(),
        factor='a_share_market_val',
        start_date=previous_trading_date,
        end_date=previous_trading_date)

    if market_cap.isnull().sum() >= 30:

        market_cap_df = rqdatac.get_fundamentals(
            rqdatac.query(rqdatac.fundamentals.eod_derivative_indicator.
                          a_share_market_val),
            entry_date=previous_trading_date,
            interval='1d').major_xs(previous_trading_date)[
                'a_share_market_val'].loc[factor_exposure.index]

        if market_cap_df.isnull().sum() >= 30:

            raise ValueError('市值出现大量缺失')

        else:

            market_cap = market_cap_df
    else:
        market_cap = market_cap.dropna()

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    industry_total_market_cap = market_cap.dot(
        factor_exposure.loc[market_cap.index][industry_factors])

    #factor_return_series = pd.DataFrame()

    # 对10个风格因子不添加约束,对 GICS 32个行业添加约束

    factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\
                                                                     industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap))

    return factor_return_series.replace(np.nan, 0)
Exemplo n.º 10
0
def customized_factor_return_estimation(date, factor_exposure, stock_list):

    latest_trading_date = rqdatac.get_previous_trading_date(
        datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(
        order_book_ids=factor_exposure.index.tolist(),
        start_date=previous_trading_date,
        end_date=latest_trading_date,
        fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(
        id_or_symbols=factor_exposure.index.tolist(),
        factor='a_share_market_val',
        start_date=previous_trading_date,
        end_date=previous_trading_date)

    missing_market_cap_stock = market_cap[market_cap.isnull() ==
                                          True].index.tolist()

    if len(missing_market_cap_stock) > 0:

        price = rqdatac.get_price(missing_market_cap_stock,
                                  previous_trading_date,
                                  previous_trading_date,
                                  fields='close',
                                  frequency='1d').T

        shares = rqdatac.get_shares(missing_market_cap_stock,
                                    previous_trading_date,
                                    previous_trading_date,
                                    fields='total_a').T

        market_cap[market_cap.isnull() == True] = (
            price * shares)[previous_trading_date]

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    if str(previous_trading_date) > '2014-01-01':

        industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\
                            '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料',  '建筑装饰', '电气设备',\
                            '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备']
    else:

        industry_factors = [
            '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器',
            '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备',
            '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属'
        ]

    style_factor = [
        'beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth',
        'book_to_price', 'leverage', 'liquidity'
    ]

    stock_list = list(
        set(market_cap.index.tolist()).intersection(set(stock_list)))

    # 各行业市值之和,用于行业收益率约束条件

    customized_industry_total_market_cap = market_cap[stock_list].dot(
        factor_exposure[industry_factors].loc[stock_list])

    # 若行业市值之和小于100,则认为基准没有配置该行业

    missing_industry = customized_industry_total_market_cap[
        customized_industry_total_market_cap < 100].index

    csi_300_industry_total_market_cap = customized_industry_total_market_cap.drop(
        missing_industry)

    # 重新计算沪深300股票池中市值和非线性市值因子暴露度

    size_exposure = get_size(market_cap[stock_list])

    non_linear_size_exposure = get_non_linear_size(size_exposure,
                                                   market_cap[stock_list])

    # 其余风格因子做市值加权标准化处理

    factors_exposure = factor_exposure.drop(missing_industry,
                                            axis=1).loc[stock_list]

    market_cap_mean = market_cap[stock_list].dot(
        factors_exposure[style_factor]) / market_cap[stock_list].sum()

    style_exposure = (factors_exposure[style_factor] -
                      market_cap_mean) / (factors_exposure[style_factor].std())

    # 将重新计算的市值和非线性市值暴露度和其余因子暴露度数据连接起来

    style_exposure = pd.concat(
        [style_exposure, size_exposure, non_linear_size_exposure], axis=1)

    style_exposure.columns = style_factor + ['size', 'non_linear_size']

    factor_exposure = pd.concat([
        style_exposure,
        factor_exposure.drop(missing_industry,
                             axis=1).loc[stock_list][industry_factors]
    ],
                                axis=1)

    factor_exposure['comovement'] = 1

    factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][stock_list].values[0], X=factor_exposure.drop(missing_industry, axis =1), weight = normalized_regression_weight[factor_exposure.index][stock_list],\
                                                                industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap))

    # 若指数在特定行业中没有配置任何股票,则因子收益率为 0

    return factor_return_series.replace(np.nan, 0)
Exemplo n.º 11
0
def black_litterman_prep(order_book_ids,
                         start_date,
                         investors_views,
                         investors_views_indicate_M,
                         investors_views_uncertainty=None,
                         asset_type=None,
                         market_weight=None,
                         risk_free_rate_tenor=None,
                         risk_aversion_coefficient=None,
                         excess_return_cov_uncertainty=None,
                         confidence_of_views=None,
                         windows=None):
    """
    Generate expected return and expected return covariance matrix with Black-Litterman model. Suppose we have N assets
    and K views.
    It's highly recommended to use your own ways to create investors_views_uncertainty, risk_aversion_coefficient and
    excess_return_cov_uncertainty beforehand to get the desired distribution parameters.
    :param order_book_ids: str list. A group of assets;
    :param asset_type: str. "fund" or "stock";
    :param start_date: str. The first day of backtest period;
    :param windows: int. Interval length of sample; Default: 132;
    :param investors_views: K*1 numpy matrix. Each row represents one view;
    :param investors_views_indicate_M: K*N numpy matrix. Each row corresponds to one view. Indicate which view is
    involved during calculation;
    :param investors_views_uncertainty: K*K diagonal matrix, optional. If it is skipped, He and Litterman's method will
    be called to generate diagonal matrix if confidence_of_view is also skipped; Idzorek's method will be called if
    confidence_of_view is passed in; Has to be non-singular;
    :param market_weight: floats list, optional. Weights for market portfolio; Default: Equal weights portfolio;
    :param risk_free_rate_tenor: str, optional. The period of risk free rate will be used. Default: "0s";
    :param risk_aversion_coefficient: float, optional. If no risk_aversion_coefficient is passed in, then
    risk_aversion_coefficient = market portfolio risk premium / market portfolio volatility;
    :param excess_return_cov_uncertainty: float, optional. Default: 1/T where T is the time length of sample;
    :param confidence_of_views: floats list, optional. Represent investors' confidence levels on each view.
    :return: expected return vector, covariance matrix of expected return, risk_aversion_coefficient,
    investors_views_uncertainty.
    """

    risk_free_rate_dict = [
        '0S', '1M', '2M', '3M', '6M', '9M', '1Y', '2Y', '3Y', '4Y', '5Y', '6Y',
        '7Y', '8Y', '9Y', '10Y', '15Y', '20Y', '30Y', '40Y', '50Y'
    ]

    if market_weight is None:
        market_weight = pd.DataFrame([1 / len(order_book_ids)] *
                                     len(order_book_ids),
                                     index=order_book_ids)
    if windows is None:
        windows = 132

    # Clean data
    if asset_type is None:
        asset_type = "fund"
    end_date = rqdatac.get_previous_trading_date(start_date)
    end_date = pd.to_datetime(end_date)
    clean_period_prices, reset_start_date = (data_process(
        order_book_ids, asset_type, start_date, windows)[i] for i in [0, 2])

    if excess_return_cov_uncertainty is None:
        excess_return_cov_uncertainty = 1 / clean_period_prices.shape[0]

    reset_start_date = rqdatac.get_next_trading_date(reset_start_date)
    # Take daily risk free rate
    if risk_free_rate_tenor is None:
        risk_free_rate = rqdatac.get_yield_curve(reset_start_date,
                                                 end_date,
                                                 tenor='0S',
                                                 country='cn')
    elif risk_free_rate_tenor in risk_free_rate_dict:
        risk_free_rate = rqdatac.get_yield_curve(reset_start_date,
                                                 end_date,
                                                 tenor=risk_free_rate_tenor,
                                                 country='cn')
    risk_free_rate['Daily'] = pd.Series(
        np.power(1 + risk_free_rate['0S'], 1 / 365) - 1,
        index=risk_free_rate.index)

    # Calculate daily risk premium for each equity
    clean_period_prices_pct_change = clean_period_prices.pct_change()
    clean_period_excess_return = clean_period_prices_pct_change.subtract(
        risk_free_rate['Daily'], axis=0)

    # Wash out the ones in kick_out_list
    clean_market_weight = market_weight.loc[clean_period_prices.columns.values]
    temp_sum_weight = clean_market_weight.sum()
    clean_market_weight = clean_market_weight.div(temp_sum_weight)

    # If no risk_aversion_coefficient is passed in, then
    # risk_aversion_coefficient = market portfolio risk premium / market portfolio volatility
    if risk_aversion_coefficient is None:
        market_portfolio_return = np.dot(clean_period_prices_pct_change,
                                         clean_market_weight)
        risk_aversion_coefficient = ((market_portfolio_return[1:].mean() -
                                      risk_free_rate["Daily"].mean()) /
                                     market_portfolio_return[1:].var())

    equilibrium_return = np.multiply(
        np.dot(clean_period_excess_return[1:].cov(), clean_market_weight),
        risk_aversion_coefficient)

    clean_period_excess_return_cov = clean_period_excess_return[1:].cov()
    # Generate the investors_views_uncertainty matrix if none is passed in
    if investors_views_uncertainty is None:
        if confidence_of_views is None:
            # He and Litteman's(1999) method to generate the uncertainty diagonal matrix, confidence level on each view
            # doesn't need.
            Omeg_diag = list()
            for i in range(investors_views_indicate_M.shape[0]):
                temp = np.dot(
                    np.dot(investors_views_indicate_M[i, :],
                           clean_period_excess_return_cov),
                    investors_views_indicate_M[
                        i, :].transpose()) * excess_return_cov_uncertainty
                Omeg_diag.append(temp.item(0))
            investors_views_uncertainty = np.diag(Omeg_diag)
        else:
            # Idzorek's(2002) method, users can specify their confidence level on each view.
            Omeg_diag = list()
            for i in range(len(investors_views)):
                part1 = excess_return_cov_uncertainty * np.dot(
                    clean_period_excess_return_cov,
                    investors_views_indicate_M[i, :].transpose())
                part2 = 1 / (excess_return_cov_uncertainty * np.dot(
                    investors_views_indicate_M[i, :],
                    np.dot(clean_period_excess_return_cov,
                           investors_views_indicate_M[i, :].transpose())))
                part3 = investors_views[i] - np.dot(
                    investors_views_indicate_M[i, :], equilibrium_return)
                return_with_full_confidence = equilibrium_return + np.multiply(
                    part2 * part3, part1)
                weights_with_full_confidence = np.dot(
                    np.linalg.inv(
                        np.multiply(risk_aversion_coefficient,
                                    clean_period_excess_return_cov)),
                    return_with_full_confidence)
                temp1 = weights_with_full_confidence - clean_market_weight
                temp2 = np.multiply(
                    confidence_of_views[i],
                    np.absolute(investors_views_indicate_M[i, :].transpose()))
                tilt = np.multiply(temp1, temp2)
                weights_with_partial_confidence = clean_market_weight.as_matrix(
                ) + tilt

                def objective_fun(x):
                    temp1 = np.linalg.inv(
                        np.multiply(risk_aversion_coefficient,
                                    clean_period_excess_return_cov))
                    temp2 = np.linalg.inv(
                        np.linalg.inv(
                            np.multiply(excess_return_cov_uncertainty,
                                        clean_period_excess_return_cov)) +
                        np.multiply(
                            np.reciprocal(x),
                            np.dot(
                                investors_views_indicate_M[i, :].transpose(),
                                investors_views_indicate_M[i, :])))
                    temp3 = (np.dot(
                        np.linalg.inv(
                            np.multiply(excess_return_cov_uncertainty,
                                        clean_period_excess_return_cov)),
                        equilibrium_return) + np.multiply(
                            investors_views[i] * np.reciprocal(x),
                            investors_views_indicate_M[i, :].transpose()))
                    wk = np.dot(temp1, np.dot(temp2, temp3))
                    return np.linalg.norm(
                        np.subtract(weights_with_partial_confidence, wk))

                # Upper bound should be consistent with the magnitude of return
                upper_bound = abs(equilibrium_return.mean()) * 100
                omega_k = sc_opt.minimize_scalar(objective_fun,
                                                 bounds=(10**-8, upper_bound),
                                                 method="bounded",
                                                 options={"xatol": 10**-8})
                Omeg_diag.append(omega_k.x.item(0))
            investors_views_uncertainty = np.diag(Omeg_diag)

    # Combine all the information above to get the distribution of expected return with given views
    combined_return_covar = np.linalg.inv(
        np.linalg.inv(
            np.multiply(excess_return_cov_uncertainty,
                        clean_period_excess_return_cov)) + np.dot(
                            np.dot(investors_views_indicate_M.transpose(),
                                   np.linalg.inv(investors_views_uncertainty)),
                            investors_views_indicate_M))
    temp1 = np.dot(
        np.linalg.inv(
            np.multiply(excess_return_cov_uncertainty,
                        clean_period_excess_return_cov)), equilibrium_return)
    temp2 = np.dot(
        np.dot(investors_views_indicate_M.transpose(),
               np.linalg.inv(investors_views_uncertainty)), investors_views)
    temp = temp1 + temp2

    combined_return_mean = np.dot(combined_return_covar, temp)

    return combined_return_mean, combined_return_covar, risk_aversion_coefficient, investors_views_uncertainty
Exemplo n.º 12
0
def customized_factor_return_estimation(date, factor_exposure, stock_list):

    latest_trading_date = rqdatac.get_previous_trading_date(
        datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(
        order_book_ids=factor_exposure.index.tolist(),
        start_date=previous_trading_date,
        end_date=latest_trading_date,
        fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(
        id_or_symbols=factor_exposure.index.tolist(),
        factor='a_share_market_val',
        start_date=previous_trading_date,
        end_date=previous_trading_date)

    missing_market_cap_stock = market_cap[market_cap.isnull() ==
                                          True].index.tolist()

    if len(missing_market_cap_stock) > 0:

        price = rqdatac.get_price(missing_market_cap_stock,
                                  previous_trading_date,
                                  previous_trading_date,
                                  fields='close',
                                  frequency='1d').T

        shares = rqdatac.get_shares(missing_market_cap_stock,
                                    previous_trading_date,
                                    previous_trading_date,
                                    fields='total_a').T

        market_cap[market_cap.isnull() == True] = (
            price * shares)[previous_trading_date]

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    if str(previous_trading_date) > '2014-01-01':

        industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\
                            '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料',  '建筑装饰', '电气设备',\
                            '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备']
    else:

        industry_factors = [
            '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器',
            '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备',
            '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属'
        ]

    stock_list = list(
        set(market_cap.index.tolist()).intersection(set(stock_list)))

    # 各行业市值之和,用于行业收益率约束条件

    customized_industry_total_market_cap = market_cap[stock_list].dot(
        factor_exposure[industry_factors].loc[stock_list])

    # 若行业市值之和小于100,则认为基准没有配置该行业

    missing_industry = customized_industry_total_market_cap[
        customized_industry_total_market_cap < 100].index

    csi_300_industry_total_market_cap = customized_industry_total_market_cap.drop(
        missing_industry)

    factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][stock_list].values[0], X = factor_exposure.drop(missing_industry, axis =1).loc[stock_list], weight = normalized_regression_weight[factor_exposure.index][stock_list],\
                                                                industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap))

    # 若指数在特定行业中没有配置任何股票,则因子收益率为 0

    return factor_return_series.replace(np.nan, 0)
Exemplo n.º 13
0
def factor_return_estimation(stock_list, date, factor_exposure):

    latest_trading_date = rqdatac.get_previous_trading_date(
        datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(order_book_ids=stock_list,
                                     start_date=previous_trading_date,
                                     end_date=latest_trading_date,
                                     fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(id_or_symbols=stock_list,
                                    factor='a_share_market_val',
                                    start_date=latest_trading_date,
                                    end_date=latest_trading_date)

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    industry_factors = ['CNE5S_ENERGY', 'CNE5S_CHEM', 'CNE5S_CONMAT', 'CNE5S_MTLMIN', 'CNE5S_MATERIAL', 'CNE5S_AERODEF',\
                        'CNE5S_BLDPROD', 'CNE5S_CNSTENG', 'CNE5S_ELECEQP', 'CNE5S_INDCONG', 'CNE5S_MACH','CNE5S_TRDDIST',\
                        'CNE5S_COMSERV', 'CNE5S_AIRLINE', 'CNE5S_MARINE', 'CNE5S_RDRLTRAN', 'CNE5S_AUTO', 'CNE5S_HOUSEDUR',\
                        'CNE5S_LEISLUX', 'CNE5S_CONSSERV', 'CNE5S_MEDIA', 'CNE5S_RETAIL', 'CNE5S_PERSPRD', 'CNE5S_BEV',\
                        'CNE5S_SOFTWARE', 'CNE5S_HDWRSEMI', 'CNE5S_UTILITIE']

    industry_total_market_cap = market_cap.dot(
        factor_exposure[industry_factors])

    factor_return_series = pd.DataFrame()

    # 对10个风格因子不添加约束,对 GICS 32个行业添加约束

    factor_return_series['whole_market'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index].values[0], X = factor_exposure, weight = normalized_regression_weight[factor_exposure.index],\
                                                                     industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = 32)
    ### 沪深300

    csi_300_components = rqdatac.index_components(index_name='000300.XSHG',
                                                  date=previous_trading_date)

    # 各行业市值之和,用于行业收益率约束条件

    csi_300_industry_total_market_cap = market_cap[csi_300_components].dot(
        factor_exposure[industry_factors].loc[csi_300_components])

    # 若行业市值之和小于100,则认为基准没有配置该行业

    missing_industry = csi_300_industry_total_market_cap[
        csi_300_industry_total_market_cap < 100].index

    csi_300_industry_total_market_cap = csi_300_industry_total_market_cap.drop(
        missing_industry)

    factor_return_series['csi_300'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_300_components].values[0], X = factor_exposure.drop(missing_industry, axis =1).loc[csi_300_components], weight = normalized_regression_weight[factor_exposure.index][csi_300_components],\
                                                                industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap))

    ### 中证500

    csi_500_components = rqdatac.index_components(index_name='000905.XSHG',
                                                  date=previous_trading_date)

    csi_500_industry_total_market_cap = market_cap[csi_500_components].dot(
        factor_exposure[industry_factors].loc[csi_500_components])

    missing_industry = csi_500_industry_total_market_cap[
        csi_500_industry_total_market_cap < 100].index

    csi_500_industry_total_market_cap = csi_500_industry_total_market_cap.drop(
        missing_industry)

    factor_return_series['csi_500'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_500_components].values[0], X = factor_exposure.drop(missing_industry, axis =1).loc[csi_500_components], weight = normalized_regression_weight[factor_exposure.index][csi_500_components],\
                                                                industry_total_market_cap = csi_500_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_500_industry_total_market_cap))

    ### 中证800

    csi_800_components = rqdatac.index_components(index_name='000906.XSHG',
                                                  date=previous_trading_date)

    csi_800_industry_total_market_cap = market_cap[csi_800_components].dot(
        factor_exposure[industry_factors].loc[csi_800_components])

    missing_industry = csi_800_industry_total_market_cap[
        csi_800_industry_total_market_cap < 100].index

    csi_800_industry_total_market_cap = csi_800_industry_total_market_cap.drop(
        missing_industry)

    factor_return_series['csi_800'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_800_components].values[0], X = factor_exposure.drop(missing_industry, axis =1).loc[csi_800_components], weight = normalized_regression_weight[factor_exposure.index][csi_800_components],\
                                                                industry_total_market_cap = csi_800_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_800_industry_total_market_cap))

    # 若指数在特定行业中没有配置任何股票,则因子收益率为 0

    return factor_return_series.replace(np.nan, 0)