Exemplo n.º 1
0
def get_cash_earnings_to_price_ratio(latest_trading_date, recent_report_type,
                                     market_cap_on_current_day):

    cash_ttm = get_ttm_sum(
        rqdatac.financials.cash_flow_statement.
        cash_flow_from_operating_activities, recent_report_type)

    stock_list = cash_ttm.index.tolist()

    stock_price = rqdatac.get_price(stock_list,
                                    start_date=latest_trading_date,
                                    end_date=latest_trading_date,
                                    fields='close',
                                    adjust_type='none').T

    shares = rqdatac.get_shares(stock_list,
                                start_date=latest_trading_date,
                                end_date=latest_trading_date,
                                fields='total').T

    cash_earning_to_price = cash_ttm / (stock_price *
                                        shares)[str(latest_trading_date)]

    processed_cash_earning_to_price = winsorization_and_market_cap_weighed_standardization(
        cash_earning_to_price,
        market_cap_on_current_day[cash_earning_to_price.index])

    return processed_cash_earning_to_price
Exemplo n.º 2
0
def get_earnings_to_price_ratio(latest_trading_date, recent_report_type,
                                market_cap_on_current_day):

    net_profit_ttm = get_ttm_sum(
        rqdatac.financials.income_statement.profit_before_tax,
        recent_report_type)

    stock_list = net_profit_ttm.index.tolist()

    stock_price = rqdatac.get_price(stock_list,
                                    start_date=latest_trading_date,
                                    end_date=latest_trading_date,
                                    fields='close',
                                    adjust_type='none').T

    shares = rqdatac.get_shares(stock_list,
                                start_date=latest_trading_date,
                                end_date=latest_trading_date,
                                fields='total').T

    earning_to_price = net_profit_ttm / (stock_price *
                                         shares)[str(latest_trading_date)]

    processed_earning_to_price = winsorization_and_market_cap_weighed_standardization(
        earning_to_price, market_cap_on_current_day[earning_to_price.index])

    return processed_earning_to_price
Exemplo n.º 3
0
def get_shares(
    order_book_ids,  # type: Union[str, List[str]]
    count=1,  # type: Optional[int]
    fields=None,  # type: Optional[str]
    expect_df=False  # type: Optional[bool]
):  # type: (...) -> Union[pd.DataFrame, pd.Series]
    """
    :param order_book_ids: 可输入 order_book_id, order_book_id list, symbol, symbol list
    :param count: 回溯获取的数据个数。默认为当前能够获取到的最近的数据
    :param fields: 期望返回的字段,默认为所有字段。见下方列表
    :param expect_df: 是否期望始终返回 DataFrame。pandas 0.25.0 以上该参数应设为 True,以避免因试图构建 Panel 产生异常

    =========================   ===================================================
    fields                      字段名
    =========================   ===================================================
    total                       总股本
    circulation_a               流通A股
    management_circulation      已流通高管持股
    non_circulation_a           非流通A股合计
    total_a                     A股总股本
    =========================   ===================================================

    :return: 查询时间段内某个股票的流通情况,当 expect_df 为 False 且 fields 指定为单一字段的情况时返回 pandas.Series

    :example:

    获取平安银行总股本数据:

    ..  code-block:: python3
        :linenos:

        logger.info(get_shares('000001.XSHE', count=5, fields='total'))
        #[Out]
        #2016-08-01    1.717041e+10
        #2016-08-02    1.717041e+10
        #2016-08-03    1.717041e+10
        #2016-08-04    1.717041e+10
        #2016-08-05    1.717041e+10
        #Name: total, dtype: float64
    """
    env = Environment.get_instance()
    dt = env.trading_dt
    if count == 1:
        start_dt = dt
    else:
        start_dt = env.data_proxy.get_previous_trading_date(dt, count - 1)

    if isinstance(order_book_ids, six.string_types):
        order_book_ids = assure_order_book_id(order_book_ids)
    else:
        order_book_ids = [assure_order_book_id(i) for i in order_book_ids]

    return rqdatac.get_shares(order_book_ids,
                              start_dt,
                              dt,
                              fields=fields,
                              expect_df=expect_df)
Exemplo n.º 4
0
def market_cap_imputation(stock_list, market_cap_on_current_day,
                          latest_trading_date):

    missing_market_cap_list = list(
        set(stock_list) - set(market_cap_on_current_day.index.tolist()))

    price_on_current_day = rqdatac.get_price(
        missing_market_cap_list,
        start_date=latest_trading_date.strftime('%Y-%m-%d'),
        end_date=latest_trading_date.strftime('%Y-%m-%d'),
        frequency='1d',
        fields='close',
        adjust_type='none').T

    shares_on_current_day = rqdatac.get_shares(
        missing_market_cap_list,
        latest_trading_date.strftime('%Y-%m-%d'),
        latest_trading_date.strftime('%Y-%m-%d'),
        fields='total_a').T

    market_cap = pd.Series(
        data=(price_on_current_day *
              shares_on_current_day)[latest_trading_date.strftime('%Y-%m-%d')],
        index=missing_market_cap_list)

    if market_cap.isnull().any():

        missing_list = market_cap[market_cap.isnull()].index.tolist()

        trading_date_22_before = rqdatac.get_trading_dates(
            latest_trading_date - timedelta(days=50),
            latest_trading_date,
            country='cn')[-22]

        missing_market_cap = (rqdatac.get_factor(
            id_or_symbols=missing_list,
            factor='a_share_market_val',
            start_date=trading_date_22_before.strftime('%Y-%m-%d'),
            end_date=latest_trading_date.strftime('%Y-%m-%d')).mean()).fillna(
                market_cap_on_current_day.mean())

        market_cap = pd.concat([market_cap, missing_market_cap])

    imputed_market_cap_on_current_day = pd.concat(
        [market_cap_on_current_day, market_cap])

    return imputed_market_cap_on_current_day
Exemplo n.º 5
0
def get_recent_five_annual_shares(stock_list, date):

    # 上市公司每年4月30日前必须公布当年报告。因此,取此前每年5月1日后第一个交易日的股票A股流通股本,作为当年的股本

    previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1

    month = datetime.strptime(date, '%Y-%m-%d').month

    if month > 5:

        list_of_dates = [
            str(previous_year) + '-05-01',
            str(previous_year - 1) + '-05-01',
            str(previous_year - 2) + '-05-01',
            str(previous_year - 3) + '-05-01',
            str(previous_year - 4) + '-05-01'
        ]

    else:

        list_of_dates = [
            str(previous_year - 1) + '-05-01',
            str(previous_year - 2) + '-05-01',
            str(previous_year - 3) + '-05-01',
            str(previous_year - 4) + '-05-01',
            str(previous_year - 5) + '-05-01'
        ]

    recent_five_annual_shares = pd.DataFrame()

    for report_date in list_of_dates:

        next_trading_date = rqdatac.get_next_trading_date(report_date)

        recent_five_annual_shares[report_date] = rqdatac.get_shares(
            stock_list,
            start_date=next_trading_date.strftime('%Y-%m-%d'),
            end_date=next_trading_date.strftime('%Y-%m-%d'),
            fields='total_a').iloc[0]

    # 调整股本 dataframe 的列名,方便相除计算每股收入

    recent_five_annual_shares.columns = [
        'first', 'second', 'third', 'fourth', 'fifth'
    ]

    return recent_five_annual_shares
Exemplo n.º 6
0
def get_liquidity(stock_list, date, market_cap_on_current_day):

    trading_date_252_before = rqdatac.get_trading_dates(date -
                                                        timedelta(days=500),
                                                        date,
                                                        country='cn')[-252]

    stock_without_suspended_stock = drop_suspended_stock(stock_list, date)

    trading_volume = rqdatac.get_price(stock_without_suspended_stock,
                                       trading_date_252_before,
                                       date,
                                       frequency='1d',
                                       fields='volume')

    outstanding_shares = rqdatac.get_shares(stock_without_suspended_stock,
                                            trading_date_252_before,
                                            date,
                                            fields='total_a')

    daily_turnover_rate = trading_volume.divide(outstanding_shares)

    # 对于对应时期内换手率为 0 的股票,其细分因子暴露度也设为0

    one_month_share_turnover = winsorization_and_market_cap_weighed_standardization(
        np.log(daily_turnover_rate.iloc[-21:].sum().replace(0, np.nan)),
        market_cap_on_current_day)

    three_months_share_turnover = winsorization_and_market_cap_weighed_standardization(
        np.log(daily_turnover_rate.iloc[-63:].sum().replace(0, np.nan) / 3),
        market_cap_on_current_day)

    twelve_months_share_turnover = winsorization_and_market_cap_weighed_standardization(
        np.log(daily_turnover_rate.iloc[-252:].sum().replace(0, np.nan) / 12),
        market_cap_on_current_day)

    liquidity = 0.35 * one_month_share_turnover.replace(
        np.nan, 0) + 0.35 * three_months_share_turnover.replace(
            np.nan, 0) + 0.3 * twelve_months_share_turnover.replace(np.nan, 0)

    processed_liquidity = winsorization_and_market_cap_weighed_standardization(
        liquidity, market_cap_on_current_day)

    return processed_liquidity
Exemplo n.º 7
0
def factor_return_estimation(latest_trading_date, factor_exposure):

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(
        order_book_ids=factor_exposure.index.tolist(),
        start_date=previous_trading_date,
        end_date=latest_trading_date,
        fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(
        id_or_symbols=factor_exposure.index.tolist(),
        factor='a_share_market_val',
        start_date=previous_trading_date,
        end_date=previous_trading_date)

    missing_market_cap_stock = market_cap[market_cap.isnull() ==
                                          True].index.tolist()

    if len(missing_market_cap_stock) > 0:

        price = rqdatac.get_price(missing_market_cap_stock,
                                  previous_trading_date,
                                  previous_trading_date,
                                  fields='close',
                                  frequency='1d').T

        shares = rqdatac.get_shares(missing_market_cap_stock,
                                    previous_trading_date,
                                    previous_trading_date,
                                    fields='total_a').T

        market_cap[market_cap.isnull() == True] = (
            price * shares)[previous_trading_date]

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    if str(previous_trading_date) > '2014-01-01':

        industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\
                            '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料',  '建筑装饰', '电气设备',\
                            '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备']
    else:

        industry_factors = [
            '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器',
            '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备',
            '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属'
        ]

    #style_factor = ['beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth', 'book_to_price',
    #                'leverage', 'liquidity','size','non_linear_size']

    industry_total_market_cap = market_cap.dot(
        factor_exposure.loc[market_cap.index][industry_factors])

    factor_return_series = pd.DataFrame()

    # 对10个风格因子不添加约束,对 GICS 32个行业添加约束

    factor_return_series['whole_market'] = constrainted_weighted_least_square(Y = daily_excess_return[market_cap.index].values[0], X = factor_exposure.loc[market_cap.index], weight = normalized_regression_weight,\
                                                                     industry_total_market_cap = industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(industry_total_market_cap))

    # 沪深300

    csi_300_components = rqdatac.index_components(index_name='000300.XSHG',
                                                  date=previous_trading_date)

    csi_300_components = list(
        set(market_cap.index.tolist()).intersection(set(csi_300_components)))

    # 各行业市值之和,用于行业收益率约束条件

    csi_300_industry_total_market_cap = market_cap[csi_300_components].dot(
        factor_exposure[industry_factors].loc[csi_300_components])

    # 若行业市值之和小于100,则认为基准没有配置该行业

    missing_industry = csi_300_industry_total_market_cap[
        csi_300_industry_total_market_cap < 100].index

    csi_300_industry_total_market_cap = csi_300_industry_total_market_cap.drop(
        missing_industry)

    # 将沪深300股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算

    csi_300_factor_exposure = factor_exposure.loc[csi_300_components]

    csi_300_factor_exposure['non_linear_size'] = orthogonalize(
        target_variable=np.power(csi_300_factor_exposure['size'], 3),
        reference_variable=csi_300_factor_exposure['size'],
        regression_weight=np.sqrt(market_cap[csi_300_components]) /
        (np.sqrt(market_cap[csi_300_components]).sum()))

    factor_return_series['csi_300'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_300_components].values[0], X = csi_300_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_300_components],\
                                                                industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap))

    # 中证500

    csi_500_components = rqdatac.index_components(index_name='000905.XSHG',
                                                  date=previous_trading_date)

    csi_500_components = list(
        set(market_cap.index.tolist()).intersection(set(csi_500_components)))

    csi_500_industry_total_market_cap = market_cap[csi_500_components].dot(
        factor_exposure[industry_factors].loc[csi_500_components])

    missing_industry = csi_500_industry_total_market_cap[
        csi_500_industry_total_market_cap < 100].index

    csi_500_industry_total_market_cap = csi_500_industry_total_market_cap.drop(
        missing_industry)

    # 将中证500股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算

    csi_500_factor_exposure = factor_exposure.loc[csi_500_components]

    csi_500_factor_exposure['non_linear_size'] = orthogonalize(
        target_variable=np.power(csi_500_factor_exposure['size'], 3),
        reference_variable=csi_500_factor_exposure['size'],
        regression_weight=np.sqrt(market_cap[csi_500_components]) /
        (np.sqrt(market_cap[csi_500_components]).sum()))

    factor_return_series['csi_500'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_500_components].values[0], X = csi_500_factor_exposure.drop(missing_industry, axis=1), weight = normalized_regression_weight[factor_exposure.index][csi_500_components],\
                                                                industry_total_market_cap = csi_500_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_500_industry_total_market_cap))

    ### 中证800

    csi_800_components = rqdatac.index_components(index_name='000906.XSHG',
                                                  date=previous_trading_date)

    csi_800_components = list(
        set(market_cap.index.tolist()).intersection(set(csi_800_components)))

    csi_800_industry_total_market_cap = market_cap[csi_800_components].dot(
        factor_exposure[industry_factors].loc[csi_800_components])

    missing_industry = csi_800_industry_total_market_cap[
        csi_800_industry_total_market_cap < 100].index

    csi_800_industry_total_market_cap = csi_800_industry_total_market_cap.drop(
        missing_industry)

    # 将中证800股票池中非线性市值暴露度与市值暴露度做正交化处理,根据定义重新计算

    csi_800_factor_exposure = factor_exposure.loc[csi_800_components]

    csi_800_factor_exposure['non_linear_size'] = orthogonalize(
        target_variable=np.power(csi_800_factor_exposure['size'], 3),
        reference_variable=csi_800_factor_exposure['size'],
        regression_weight=np.sqrt(market_cap[csi_800_components]) /
        (np.sqrt(market_cap[csi_800_components]).sum()))

    factor_return_series['csi_800'] = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][csi_800_components].values[0], X = csi_800_factor_exposure.drop(missing_industry, axis =1), weight = normalized_regression_weight[factor_exposure.index][csi_800_components],\
                                                                industry_total_market_cap = csi_800_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_800_industry_total_market_cap))

    # 若指数在特定行业中没有配置任何股票,则因子收益率为 0

    return factor_return_series.replace(np.nan, 0)
Exemplo n.º 8
0
def customized_factor_return_estimation(date, factor_exposure, stock_list):

    latest_trading_date = rqdatac.get_previous_trading_date(
        datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(
        order_book_ids=factor_exposure.index.tolist(),
        start_date=previous_trading_date,
        end_date=latest_trading_date,
        fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(
        id_or_symbols=factor_exposure.index.tolist(),
        factor='a_share_market_val',
        start_date=previous_trading_date,
        end_date=previous_trading_date)

    missing_market_cap_stock = market_cap[market_cap.isnull() ==
                                          True].index.tolist()

    if len(missing_market_cap_stock) > 0:

        price = rqdatac.get_price(missing_market_cap_stock,
                                  previous_trading_date,
                                  previous_trading_date,
                                  fields='close',
                                  frequency='1d').T

        shares = rqdatac.get_shares(missing_market_cap_stock,
                                    previous_trading_date,
                                    previous_trading_date,
                                    fields='total_a').T

        market_cap[market_cap.isnull() == True] = (
            price * shares)[previous_trading_date]

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    if str(previous_trading_date) > '2014-01-01':

        industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\
                            '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料',  '建筑装饰', '电气设备',\
                            '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备']
    else:

        industry_factors = [
            '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器',
            '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备',
            '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属'
        ]

    style_factor = [
        'beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth',
        'book_to_price', 'leverage', 'liquidity'
    ]

    stock_list = list(
        set(market_cap.index.tolist()).intersection(set(stock_list)))

    # 各行业市值之和,用于行业收益率约束条件

    customized_industry_total_market_cap = market_cap[stock_list].dot(
        factor_exposure[industry_factors].loc[stock_list])

    # 若行业市值之和小于100,则认为基准没有配置该行业

    missing_industry = customized_industry_total_market_cap[
        customized_industry_total_market_cap < 100].index

    csi_300_industry_total_market_cap = customized_industry_total_market_cap.drop(
        missing_industry)

    # 重新计算沪深300股票池中市值和非线性市值因子暴露度

    size_exposure = get_size(market_cap[stock_list])

    non_linear_size_exposure = get_non_linear_size(size_exposure,
                                                   market_cap[stock_list])

    # 其余风格因子做市值加权标准化处理

    factors_exposure = factor_exposure.drop(missing_industry,
                                            axis=1).loc[stock_list]

    market_cap_mean = market_cap[stock_list].dot(
        factors_exposure[style_factor]) / market_cap[stock_list].sum()

    style_exposure = (factors_exposure[style_factor] -
                      market_cap_mean) / (factors_exposure[style_factor].std())

    # 将重新计算的市值和非线性市值暴露度和其余因子暴露度数据连接起来

    style_exposure = pd.concat(
        [style_exposure, size_exposure, non_linear_size_exposure], axis=1)

    style_exposure.columns = style_factor + ['size', 'non_linear_size']

    factor_exposure = pd.concat([
        style_exposure,
        factor_exposure.drop(missing_industry,
                             axis=1).loc[stock_list][industry_factors]
    ],
                                axis=1)

    factor_exposure['comovement'] = 1

    factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][stock_list].values[0], X=factor_exposure.drop(missing_industry, axis =1), weight = normalized_regression_weight[factor_exposure.index][stock_list],\
                                                                industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap))

    # 若指数在特定行业中没有配置任何股票,则因子收益率为 0

    return factor_return_series.replace(np.nan, 0)
Exemplo n.º 9
0
def get_sales_growth(date, year, market_cap_on_current_day):
    recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report(
        date)
    growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") -
                                    timedelta(days=1825)).strftime("%Y-%m-%d")
    growth_qualified_stocks = [
        i for i in annual_report.index.tolist()
        if rqdatac.instruments(i).listed_date < growth_listed_date_threshold
    ]

    factor = pd.DataFrame(index=growth_qualified_stocks, columns=['SGRO'])

    # 根据年报数据计算每只股票过去五年每年的sales per share

    for stock in growth_qualified_stocks:

        query = rqdatac.query(
            rqdatac.financials.income_statement.revenue).filter(
                rqdatac.financials.stockcode.in_([stock]))
        sales_recent = rqdatac.get_financials(query, annual_report[stock],
                                              '1q')

        latest_trading_date_recent = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(annual_report[stock][:4] +
                                  '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_recent = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_recent,
            end_date=latest_trading_date_recent,
            fields='total')

        sales_per_share_recent = sales_recent.values / shares_recent.values

        sales_last_year = rqdatac.get_financials(
            query, annual_report_last_year[stock], '1q')

        latest_trading_date_last_year = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_last_year[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_last_year = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_last_year,
            end_date=latest_trading_date_last_year,
            fields='total')

        sales_per_share_last_year = sales_last_year.values / shares_last_year.values

        sales_2_year_ago = rqdatac.get_financials(
            query, annual_report_2_year_ago[stock], '1q')

        latest_trading_date_2_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_2_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_2_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_2_year_ago,
            end_date=latest_trading_date_2_year_ago,
            fields='total')

        sales_per_share_2_year_ago = sales_2_year_ago.values / shares_2_year_ago.values

        sales_3_year_ago = rqdatac.get_financials(
            query, annual_report_3_year_ago[stock], '1q')

        latest_trading_date_3_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_3_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_3_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_3_year_ago,
            end_date=latest_trading_date_3_year_ago,
            fields='total')

        sales_per_share_3_year_ago = sales_3_year_ago.values / shares_3_year_ago.values

        sales_4_year_ago = rqdatac.get_financials(
            query, annual_report_4_year_ago[stock], '1q')

        latest_trading_date_4_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_4_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_4_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_4_year_ago,
            end_date=latest_trading_date_4_year_ago,
            fields='total')

        sales_per_share_4_year_ago = sales_4_year_ago.values / shares_4_year_ago.values

        regression = linear_model.LinearRegression()
        sales_per_share = pd.Series([
            sales_per_share_recent, sales_per_share_last_year,
            sales_per_share_2_year_ago, sales_per_share_3_year_ago,
            sales_per_share_4_year_ago
        ]).fillna(value=0)
        regression.fit(year.reshape(-1, 1), sales_per_share)
        factor['SGRO'][stock] = float(
            regression.coef_) / abs(sales_per_share).mean()

    sale_growth = winsorization_and_market_cap_weighed_standardization(
        factor['SGRO'], market_cap_on_current_day)

    return sale_growth
Exemplo n.º 10
0
def customized_factor_return_estimation(date, factor_exposure, stock_list):

    latest_trading_date = rqdatac.get_previous_trading_date(
        datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))

    previous_trading_date = rqdatac.get_previous_trading_date(
        latest_trading_date)

    # 计算无风险日收益率

    daily_return = rqdatac.get_price(
        order_book_ids=factor_exposure.index.tolist(),
        start_date=previous_trading_date,
        end_date=latest_trading_date,
        fields='close').pct_change()[-1:].T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=latest_trading_date,
        end_date=latest_trading_date,
        tenor='3M')['3M']

    daily_risk_free_return = (((1 + compounded_risk_free_return)**(1 / 252)) -
                              1)

    daily_excess_return = daily_return.subtract(
        daily_risk_free_return.values).T

    # 以市场平方根作为加权最小二乘法的加权系数

    market_cap = rqdatac.get_factor(
        id_or_symbols=factor_exposure.index.tolist(),
        factor='a_share_market_val',
        start_date=previous_trading_date,
        end_date=previous_trading_date)

    missing_market_cap_stock = market_cap[market_cap.isnull() ==
                                          True].index.tolist()

    if len(missing_market_cap_stock) > 0:

        price = rqdatac.get_price(missing_market_cap_stock,
                                  previous_trading_date,
                                  previous_trading_date,
                                  fields='close',
                                  frequency='1d').T

        shares = rqdatac.get_shares(missing_market_cap_stock,
                                    previous_trading_date,
                                    previous_trading_date,
                                    fields='total_a').T

        market_cap[market_cap.isnull() == True] = (
            price * shares)[previous_trading_date]

    normalized_regression_weight = market_cap.pow(0.5) / market_cap.pow(
        0.5).sum()

    # 各行业市值之和,用于行业收益率约束条件

    if str(previous_trading_date) > '2014-01-01':

        industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\
                            '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料',  '建筑装饰', '电气设备',\
                            '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备']
    else:

        industry_factors = [
            '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器',
            '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备',
            '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属'
        ]

    stock_list = list(
        set(market_cap.index.tolist()).intersection(set(stock_list)))

    # 各行业市值之和,用于行业收益率约束条件

    customized_industry_total_market_cap = market_cap[stock_list].dot(
        factor_exposure[industry_factors].loc[stock_list])

    # 若行业市值之和小于100,则认为基准没有配置该行业

    missing_industry = customized_industry_total_market_cap[
        customized_industry_total_market_cap < 100].index

    csi_300_industry_total_market_cap = customized_industry_total_market_cap.drop(
        missing_industry)

    factor_return_series = constrainted_weighted_least_square(Y = daily_excess_return[factor_exposure.index][stock_list].values[0], X = factor_exposure.drop(missing_industry, axis =1).loc[stock_list], weight = normalized_regression_weight[factor_exposure.index][stock_list],\
                                                                industry_total_market_cap = csi_300_industry_total_market_cap, unconstrained_variables = 10, constrained_variables = len(csi_300_industry_total_market_cap))

    # 若指数在特定行业中没有配置任何股票,则因子收益率为 0

    return factor_return_series.replace(np.nan, 0)
Exemplo n.º 11
0
def get_liquidity(stock_list, date, market_cap_on_current_day):

    trading_date_252_before = rqdatac.get_trading_dates(date -
                                                        timedelta(days=500),
                                                        date,
                                                        country='cn')[-252]

    trading_volume = rqdatac.get_price(stock_list,
                                       trading_date_252_before,
                                       date,
                                       frequency='1d',
                                       fields='volume')

    inds = trading_volume.loc[date][trading_volume.loc[date].values ==
                                    0].index.tolist()

    stock_list = list(set(stock_list) - set(inds))

    outstanding_shares = rqdatac.get_shares(stock_list,
                                            trading_date_252_before,
                                            date,
                                            fields='total_a')

    daily_turnover_rate = trading_volume[stock_list].divide(outstanding_shares)

    # 对于对应时期内换手率为 0 的股票,其细分因子暴露度也设为0

    one_month_share_turnover = winsorization_and_market_cap_weighed_standardization(
        np.log(daily_turnover_rate.iloc[-21:].sum().replace(0, np.nan)),
        market_cap_on_current_day)

    three_months_share_turnover = winsorization_and_market_cap_weighed_standardization(
        np.log(daily_turnover_rate.iloc[-63:].sum().replace(0, np.nan) / 3),
        market_cap_on_current_day)

    twelve_months_share_turnover = winsorization_and_market_cap_weighed_standardization(
        np.log(daily_turnover_rate.iloc[-252:].sum().replace(0, np.nan) / 12),
        market_cap_on_current_day)

    atomic_descriptors_df = pd.concat([
        one_month_share_turnover, three_months_share_turnover,
        twelve_months_share_turnover
    ],
                                      axis=1)

    atomic_descriptors_df.columns = [
        'one_month_share_turnover', 'three_months_share_turnover',
        'twelve_months_share_turnover'
    ]

    atom_descriptors_weight = pd.Series(data=[0.35, 0.35, 0.3],
                                        index=[
                                            'one_month_share_turnover',
                                            'three_months_share_turnover',
                                            'twelve_months_share_turnover'
                                        ])

    liquidity = atomic_descriptors_imputation_and_combination(
        atomic_descriptors_df, atom_descriptors_weight)

    processed_liquidity = winsorization_and_market_cap_weighed_standardization(
        liquidity, market_cap_on_current_day)

    return one_month_share_turnover, three_months_share_turnover, twelve_months_share_turnover, processed_liquidity