Пример #1
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股ETOP因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的ETOP因子载荷
            0. code
            1. etop
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        # 读取个股的ttm净利润
        ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
        if ttm_fin_data is None:
            return None
        ttm_netprofit = ttm_fin_data['NetProfit']
        if pd.isnull(ttm_netprofit):
            return None
        # 读取个股市值
        lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file)
        lncap_data = Utils.read_factor_loading(lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code)
        if lncap_data.empty:
            return None
        secu_cap = np.exp(lncap_data['factorvalue'])
        # etop = ttm净利润/市值
        etop = ttm_netprofit * 10000 / secu_cap

        return pd.Series([code, etop], index=['code', 'etop'])
Пример #2
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的BTOP因子载荷
     Paramters:
     --------
     :param code: str
         个股代码, 如SH600000, 600000
     :param calc_date: datetime-like, str
         计算日期, 格式: YYYY-MM-DD
     :return: pd.Series
     --------
         个股的BTOP因子载荷
         0. code
         1. btop
         如果计算失败, 返回None
     """
     # 读取个股的财务数据
     fin_report_date = Utils.get_fin_report_date(calc_date)
     fin_basic_data = Utils.get_fin_basic_data(code, fin_report_date)
     if fin_basic_data is None:
         return None
     # 读取个股的市值因子(LNCAP)
     df_lncap = cls._LNCAP_Cache.get(Utils.datetimelike_to_str(calc_date, dash=False))
     if df_lncap is None:
         lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.LNCAP_CT.db_file)
         df_lncap = Utils.read_factor_loading(lncap_path, Utils.datetimelike_to_str(calc_date, dash=False))
         cls._LNCAP_Cache.set(Utils.datetimelike_to_str(calc_date, dash=False), df_lncap)
     secu_lncap = df_lncap[df_lncap['id'] == Utils.code_to_symbol(code)]
     if secu_lncap.empty:
         return None
     flncap = secu_lncap.iloc[0]['factorvalue']
     # 账面市值比=净资产/市值
     btop = (fin_basic_data['TotalAsset'] - fin_basic_data['TotalLiability']) * 10000 / np.exp(flncap)
     return pd.Series([Utils.code_to_symbol(code), btop], index=['code', 'btop'])
Пример #3
0
def _get_factorloading(factor_name, date, factor_type):
    """
    读取个股因子载荷数据
    Parameters:
    --------
    :param factor_name: str
        alpha因子名称, e.g: SmartMoney
    :param date: datetime-like, str
        日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param factor_type: str
        因子类型:
        'raw': 原始因子, 'standardized': 去极值标准化后的因子, 'orthogonalized': 正交化后的因子
    :return: pd.DataFrame
    --------
        因子载荷向量数据
        0. date: 日期
        1. id: 证券代码
        2. factorvalue: 因子值
    """
    date = Utils.datetimelike_to_str(date, dash=False)
    factorloading_path = os.path.join(
        SETTINGS.FACTOR_DB_PATH,
        eval('alphafactor_ct.' + factor_name.upper() + '.CT')['db_file'],
        factor_type, factor_name)
    df_factorloading = Utils.read_factor_loading(factorloading_path,
                                                 date,
                                                 drop_na=True)
    return df_factorloading
Пример #4
0
 def calc_factor_loading_(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
     """
     计算指定日期的样本个股的因子载荷, 并保存至因子数据库
     Parameters:
     --------
     :param start_date: datetime-like, str
         开始日期, 格式: YYYY-MM-DD or YYYYMMDD
     :param end_date: datetime-like, str
         结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
     :param month_end: bool, 默认为True
         如果为True, 则只计算月末时点的因子载荷
     :param save: bool, 默认为True
         是否保存至因子数据库
     :param kwargs:
         'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
     :return: dict
         因子载荷数据
     """
     # 取得交易日序列
     start_date = Utils.to_date(start_date)
     if end_date is not None:
         end_date = Utils.to_date(end_date)
         trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
     else:
         trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
     # 遍历交易日序列, 计算ResVolatility因子下各个成分因子的因子载荷
     if 'multi_proc' not in kwargs:
         kwargs['multi_proc'] = False
     for calc_date in trading_days_series:
         if month_end and (not Utils.is_month_end(calc_date)):
             continue
         # 计算各成分因子的因子载荷
         for com_factor in risk_ct.RESVOLATILITY_CT.component:
             factor = eval(com_factor + '()')
             factor.calc_factor_loading(start_date=calc_date, end_date=None, month_end=month_end, save=save, multi_proc=kwargs['multi_proc'])
         # 合成ResVolatility因子载荷
         resvol_factor = pd.DataFrame()
         for com_factor in risk_ct.RESVOLATILITY_CT.component:
             factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, eval('risk_ct.' + com_factor + '_CT')['db_file'])
             factor_loading = Utils.read_factor_loading(factor_path, Utils.datetimelike_to_str(calc_date, dash=False))
             factor_loading.drop(columns='date', inplace=True)
             factor_loading[com_factor] = Utils.normalize_data(Utils.clean_extreme_value(np.array(factor_loading['factorvalue']).reshape((len(factor_loading), 1))))
             factor_loading.drop(columns='factorvalue', inplace=True)
             if resvol_factor.empty:
                 resvol_factor = factor_loading
             else:
                 resvol_factor = pd.merge(left=resvol_factor, right=factor_loading, how='inner', on='id')
         resvol_factor.set_index('id', inplace=True)
         weight = pd.Series(risk_ct.RESVOLATILITY_CT.weight)
         resvol_factor = (resvol_factor * weight).sum(axis=1)
         resvol_factor.name = 'factorvalue'
         resvol_factor.index.name = 'id'
         resvol_factor = pd.DataFrame(resvol_factor)
         resvol_factor.reset_index(inplace=True)
         resvol_factor['date'] = Utils.get_trading_days(start=calc_date, ndays=2)[1]
         # 保存ResVolatility因子载荷
         if save:
             Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), resvol_factor.to_dict('list'),['date', 'id', 'factorvalue'])
Пример #5
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股MLEV因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如Sh600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的MLEV因子载荷
            0. code
            1. mlev
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        report_date = Utils.get_fin_report_date(calc_date)
        # 读取个股最新财务报表摘要数据
        fin_summary_data = Utils.get_fin_summary_data(code, report_date)
        # ld为个股长期负债的账面价值, 如果缺失长期负债数据, 则用负债总计代替
        if fin_summary_data is None:
            return None
        ld = fin_summary_data['TotalNonCurrentLiabilities']
        if np.isnan(ld):
            ld = fin_summary_data['TotalLiabilities']
        if np.isnan(ld):
            return None
        ld *= 10000.0
        # pe为优先股账面价值, 对于A股pe设置为0
        pe = 0.0
        # 读取个股市值数据
        lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                  risk_ct.LNCAP_CT.db_file)
        lncap_factor_loading = Utils.read_factor_loading(
            lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code)
        if lncap_factor_loading.empty:
            return None
        me = np.exp(lncap_factor_loading['factorvalue'])
        # mlev = (me + pe + ld)/me
        mlev = (me + pe + ld) / me

        return pd.Series([code, mlev], index=['code', 'mlev'])
Пример #6
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股EPFWD因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的EPFWD因子载荷
            0. code
            1. epfwd
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        # 读取个股的预期盈利数据
        predictedearnings_data = Utils.get_consensus_data(
            calc_date, code, ConsensusType.PredictedEarings)
        if predictedearnings_data is None:
            # 如果个股的预期盈利数据不存在, 那么代替ttm净利润
            ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
            if ttm_fin_data is None:
                return None
            predictedearnings_data = pd.Series(
                [code, ttm_fin_data['NetProfit']],
                index=['code', 'predicted_earnings'])
        fpredictedearnings = predictedearnings_data['predicted_earnings']
        if np.isnan(fpredictedearnings):
            return None
        # 读取个股市值
        size_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                 risk_ct.LNCAP_CT.db_file)
        size_factor_loading = Utils.read_factor_loading(
            size_path, Utils.datetimelike_to_str(calc_date, dash=False), code)
        if size_factor_loading.empty:
            return None
        # epfwd = 盈利预期/市值
        epfwd = fpredictedearnings * 10000.0 / np.exp(
            size_factor_loading['factorvalue'])

        return pd.Series([code, epfwd], index=['code', 'epfwd'])
Пример #7
0
 def _get_factor_loading(cls,
                         db_file,
                         str_key,
                         factor_name=None,
                         factor_type=None,
                         **kwargs):
     """
     读取因子载荷数据
     Parameters:
     --------
     :param db_file: str
         因子载荷数据文件路径(绝对路径)
     :param str_key: str
         键值, 一般为日期, e.g: YYYY-MM-DD, YYYYMMDD
     :param factor_name: str, 默认为None
         因子名称
     :param factor_type: str, 默认为None
         因子类型, e.g: 'raw', 'standardized', 'orthogonalized'
     :param kwargs:
         kwargs['code']: str, 默认为None; 个股代码, e.g: SH600000, 600000
         kwargs['nan_value']: object, 默认为None; 如果不为None, 那么缺失值用nan_value替换
         kwargs['drop_na']: bool, 默认False; 是否删除含有NaN值的行
     :return: pd.DataFrame or pd.Series, 因子载荷
     --------
         pd.DataFrame(code==None) or pd.Series(code!=None)
         0. date
         1. id
         2. factorvalue
     """
     if factor_type is not None:
         db_file = os.path.join(db_file, factor_type, factor_name)
     if 'code' not in kwargs:
         kwargs['code'] = None
     if 'na_value' not in kwargs:
         kwargs['na_value'] = None
     if 'drop_na' not in kwargs:
         kwargs['drop_na'] = False
     return Utils.read_factor_loading(db_file, str_key, kwargs['code'],
                                      kwargs['na_value'], kwargs['drop_na'])
Пример #8
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股CETOP因子载荷
        Parameters:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的CETOP因子载荷
            0. code
            1. cetop
            如果计算失败, 返回None
        """
        code = Utils.code_to_symbol(code)
        # 读取个股的主要财务指标数据ttm值
        ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date)
        if ttm_fin_data is None:
            return None
        ttm_cash = ttm_fin_data['NetOperateCashFlow']
        if np.isnan(ttm_cash):
            return None
        # 读取个股市值
        lncap_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                  risk_ct.LNCAP_CT.db_file)
        lncap_data = Utils.read_factor_loading(
            lncap_path, Utils.datetimelike_to_str(calc_date, dash=False), code)
        if lncap_data.empty:
            return None
        secu_cap = np.exp(lncap_data['factorvalue'])
        # cetop = 经营活动现金流ttm值/市值
        cetop = ttm_cash * 10000 / secu_cap

        return pd.Series([code, cetop], index=['code', 'cetop'])
Пример #9
0
def smartq_backtest(start, end):
    """
    SmartQ因子的历史回测
    Parameters:
    --------
    :param start: datetime-like, str
        回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初
    :param end: datetime-like, str
        回测结束日期,格式:YYYY-MM-DD
    :return:
    """
    # 取得开始结束日期间的交易日序列
    trading_days = Utils.get_trading_days(start, end)
    # 读取截止开始日期前最新的组合回测数据
    prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1)
    backtest_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path)
    factor_data, port_nav = Utils.get_backtest_data(backtest_path,
                                                    trading_days.iloc[0])
    # factor_data = None  # 记录每次调仓时最新入选个股的SmartQ因子信息,pd.DataFrame<date,factorvalue,id,buprice>
    if port_nav is None:
        port_nav = DataFrame({
            'date': [prev_trading_day.strftime('%Y-%m-%d')],
            'nav': [1.0]
        })
    # 遍历交易日,如果是月初,则读取SmartQ因子载荷值,进行调仓;如果不是月初,则进行组合估值
    t = 0  # 记录调仓次数
    for trading_day in trading_days:
        if factor_data is None:
            nav = port_nav[port_nav.date == prev_trading_day.strftime(
                '%Y-%m-%d')].iloc[0].nav
        else:
            nav = port_nav[port_nav.date ==
                           factor_data.iloc[0].date].iloc[0].nav
        interval_ret = 0.0
        # 月初进行调仓
        if Utils.is_month_start(trading_day):
            logging.info('[%s] 月初调仓.' %
                         Utils.datetimelike_to_str(trading_day, True))
            # 调仓前,先计算组合按均价卖出原先组合个股在当天的估值
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    if daily_mkt.date == trading_day.strftime('%Y-%m-%d'):
                        vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                    else:
                        vwap_price = daily_mkt.close
                    interval_ret += vwap_price / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
            # 读取factor_data
            factor_data = Utils.read_factor_loading(
                SmartMoney.get_db_file(),
                Utils.datetimelike_to_str(prev_trading_day, False))
            # 遍历factor_data, 计算每个个股过去20天的涨跌幅,并剔除在调仓日没有正常交易(如停牌)及涨停的个股
            ind_to_be_deleted = []
            factor_data['ret20'] = np.zeros(len(factor_data))
            for ind, factor_info in factor_data.iterrows():
                trading_status = Utils.trading_status(factor_info.id,
                                                      trading_day)
                if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp:
                    ind_to_be_deleted.append(ind)
                fret20 = Utils.calc_interval_ret(factor_info.id,
                                                 end=prev_trading_day,
                                                 ndays=20)
                if fret20 is None:
                    if ind not in ind_to_be_deleted:
                        ind_to_be_deleted.append(ind)
                else:
                    factor_data.loc[ind, 'ret20'] = fret20
            factor_data = factor_data.drop(ind_to_be_deleted, axis=0)
            # 对factor_data过去20天涨跌幅降序排列,剔除涨幅最大的20%个股
            k = int(factor_data.shape[0] * 0.2)
            factor_data = factor_data.sort_values(by='ret20',
                                                  ascending=False).iloc[k:]
            del factor_data['ret20']  # 删除ret20列
            # 对factor_data按因子值升序排列,取前10%个股
            factor_data = factor_data.sort_values(by='factorvalue',
                                                  ascending=True)
            k = int(factor_data.shape[0] * 0.1)
            factor_data = factor_data.iloc[:k]
            # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益
            factor_data['buyprice'] = 0.0
            interval_ret = 0.0
            for ind, factor_info in factor_data.iterrows():
                daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                     trading_day,
                                                     fq=True,
                                                     range_lookup=False)
                assert len(daily_mkt) > 0
                factor_data.loc[
                    ind,
                    'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                interval_ret += daily_mkt.close / factor_data.loc[
                    ind, 'buyprice'] - 1.0
            interval_ret /= float(factor_data.shape[0])
            nav *= (1.0 + interval_ret)
            # 保存factor_data
            port_data_path = os.path.join(
                SETTINGS.FACTOR_DB_PATH,
                alphafactor_ct.SMARTMONEY_CT.backtest_path,
                'port_data_%s.csv' %
                Utils.datetimelike_to_str(trading_day, False))
            factor_data.to_csv(port_data_path, index=False)
            t += 1
            if t % 6 == 0:
                logging.info('Suspended for 300s.')
                time.sleep(300)
        else:
            # 非调仓日,对组合进行估值
            logging.info('[%s] 月中估值.' %
                         Utils.datetimelike_to_str(trading_day, True))
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    interval_ret += daily_mkt.close / factor_info.buyprice - 1.0
                interval_ret /= float(factor_data.shape[0])
                nav *= (1.0 + interval_ret)
        # 添加nav
        port_nav = port_nav.append(Series({
            'date':
            Utils.datetimelike_to_str(trading_day, True),
            'nav':
            nav
        }),
                                   ignore_index=True)
        # 设置prev_trading_day
        prev_trading_day = trading_day
    # 保存port_nav
    port_nav_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path,
                                 'port_nav.csv')
    port_nav.to_csv(port_nav_path, index=False)
Пример #10
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷数据
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算LIQUIDITY因子载荷
        dict_raw_liquidity = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            dict_stom = None
            dict_stoq = None
            dict_stoa = None
            dict_raw_liquidity = None
            logging.info('[%s] Calc LIQUIDITY factor loading.' %
                         Utils.datetimelike_to_str(calc_date))
            # 遍历个股,计算个股LIQUIDITY因子值
            s = (calc_date - datetime.timedelta(
                days=risk_ct.LIQUID_CT.listed_days)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            ids = []
            stoms = []
            stoqs = []
            stoas = []
            raw_liquidities = []

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算LIQUIDITY因子值
                for _, stock_info in stock_basics.iterrows():
                    logging.info("[%s] Calc %s's LIQUIDITY factor loading." %
                                 (Utils.datetimelike_to_str(
                                     calc_date, dash=True), stock_info.symbol))
                    liquidity_data = cls._calc_factor_loading(
                        stock_info.symbol, calc_date)
                    if liquidity_data is not None:
                        ids.append(liquidity_data['code'])
                        stoms.append(liquidity_data['stom'])
                        stoqs.append(liquidity_data['stoq'])
                        stoas.append(liquidity_data['stoa'])
                        raw_liquidities.append(liquidity_data['liquidity'])
            else:
                # 采用多进程计算LIQUIDITY因子值
                q = Manager().Queue()
                p = Pool(4)
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    liquidity_data = q.get(True)
                    ids.append(liquidity_data['code'])
                    stoms.append(liquidity_data['stom'])
                    stoqs.append(liquidity_data['stoq'])
                    stoas.append(liquidity_data['stoa'])
                    raw_liquidities.append(liquidity_data['liquidity'])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_stom = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': stoms
            })
            dict_stoq = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': stoqs
            })
            dict_stoa = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': stoas
            })
            dict_raw_liquidity = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': raw_liquidities
            })
            # 读取Size因子值, 将流动性因子与Size因子正交化
            size_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                            risk_ct.SIZE_CT.db_file)
            df_size = Utils.read_factor_loading(
                size_factor_path,
                Utils.datetimelike_to_str(calc_date, dash=False))
            df_size.drop(columns='date', inplace=True)
            df_size.rename(columns={'factorvalue': 'size'}, inplace=True)
            df_liquidity = pd.DataFrame(
                dict({
                    'id': ids,
                    'liquidity': raw_liquidities
                }))
            df_liquidity = pd.merge(left=df_liquidity,
                                    right=df_size,
                                    how='inner',
                                    on='id')
            arr_liquidity = Utils.normalize_data(
                Utils.clean_extreme_value(
                    np.array(df_liquidity['liquidity']).reshape(
                        (len(df_liquidity), 1))))
            arr_size = Utils.normalize_data(
                Utils.clean_extreme_value(
                    np.array(df_liquidity['size']).reshape(
                        (len(df_liquidity), 1))))
            model = sm.OLS(arr_liquidity, arr_size)
            results = model.fit()
            df_liquidity['liquidity'] = results.resid
            df_liquidity.drop(columns='size', inplace=True)
            df_liquidity.rename(columns={'liquidity': 'factorvalue'},
                                inplace=True)
            df_liquidity['date'] = date_label
            # 保存因子载荷
            if save:
                str_date = Utils.datetimelike_to_str(calc_date, dash=False)
                factor_header = ['date', 'id', 'factorvalue']
                Utils.factor_loading_persistent(cls._db_file,
                                                'stom_{}'.format(str_date),
                                                dict_stom, factor_header)
                Utils.factor_loading_persistent(cls._db_file,
                                                'stoq_{}'.format(str_date),
                                                dict_stoq, factor_header)
                Utils.factor_loading_persistent(cls._db_file,
                                                'stoa_{}'.format(str_date),
                                                dict_stoa, factor_header)
                Utils.factor_loading_persistent(
                    cls._db_file, 'rawliquidity_{}'.format(str_date),
                    dict_raw_liquidity, factor_header)
                Utils.factor_loading_persistent(cls._db_file, str_date,
                                                df_liquidity.to_dict('list'),
                                                factor_header)

            # 暂停180秒
            logging.info('Suspending for 180s.')
            time.sleep(180)
        return dict_raw_liquidity
Пример #11
0
    def calc_factor_loading_(cls,
                             start_date,
                             end_date=None,
                             month_end=True,
                             save=False,
                             **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷数据
        """
        # 取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 遍历交易日序列, 计算growth因子下各个成分因子的因子载荷
        if 'multi_proc' not in kwargs:
            kwargs['multi_proc'] = False
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 计算各成分因子的因子载荷
            for com_factor in risk_ct.GROWTH_CT.component:
                factor = eval(com_factor + '()')
                factor.calc_factor_loading(start_date=calc_date,
                                           end_date=None,
                                           month_end=month_end,
                                           save=save,
                                           multi_proc=kwargs['multi_proc'])
            # 合成Growth因子载荷
            growth_factor = pd.DataFrame()
            df_industry_classify = Utils.get_industry_classify()  # 个股行业分类数据
            for com_factor in risk_ct.GROWTH_CT.component:
                factor_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    eval('risk_ct.' + com_factor + '_CT')['db_file'])
                factor_loading = Utils.read_factor_loading(
                    factor_path,
                    Utils.datetimelike_to_str(calc_date, dash=False))
                factor_loading.drop(columns='date', inplace=True)
                # factor_loading[com_factor] = Utils.normalize_data(Utils.clean_extreme_value(np.array(factor_loading['factorvalue']).reshape((len(factor_loading), 1))))
                # factor_loading.drop(columns='factorvalue', inplace=True)
                factor_loading.rename(columns={'factorvalue': com_factor},
                                      inplace=True)
                # 添加行业分类数据
                factor_loading = pd.merge(
                    left=factor_loading,
                    right=df_industry_classify[['id', 'ind_code']],
                    how='inner',
                    on='id')
                # 取得含缺失值的因子载荷数据
                missingdata_factor = factor_loading[
                    factor_loading[com_factor].isna()]
                # 删除factor_loading中的缺失值
                factor_loading.dropna(axis='index', how='any', inplace=True)
                # 对factor_loading去极值、标准化
                factor_loading = Utils.normalize_data(factor_loading,
                                                      id='id',
                                                      columns=com_factor,
                                                      treat_outlier=True,
                                                      weight='cap',
                                                      calc_date=calc_date)
                # 把missingdata_factor中的缺失值替换为行业均值
                ind_codes = set(missingdata_factor['ind_code'])
                ind_mean_factor = {}
                for ind_code in ind_codes:
                    ind_mean_factor[ind_code] = factor_loading[
                        factor_loading['ind_code'] ==
                        ind_code][com_factor].mean()
                for idx, missingdata in missingdata_factor.iterrows():
                    missingdata_factor.loc[idx, com_factor] = ind_mean_factor[
                        missingdata['ind_code']]
                # 把missingdata_factor和factor_loading合并
                factor_loading = pd.concat(
                    [factor_loading, missingdata_factor])
                # 删除ind_code列
                factor_loading.drop(columns='ind_code', inplace=True)

                if growth_factor.empty:
                    growth_factor = factor_loading
                else:
                    growth_factor = pd.merge(left=growth_factor,
                                             right=factor_loading,
                                             how='inner',
                                             on='id')

            # # 读取个股行业分类数据, 添加至growth_factor中
            # df_industry_classify = Utils.get_industry_classify()
            # growth_factor = pd.merge(left=growth_factor, right=df_industry_classify[['id', 'ind_code']])
            # # 取得含缺失值的因子载荷数据
            # missingdata_factor = growth_factor.loc[[ind for ind, data in growth_factor.iterrows() if data.hasnans]]
            # # 删除growth_factot中的缺失值
            # growth_factor.dropna(axis='index', how='any', inplace=True)
            # # 对growth_factor去极值、标准化
            # growth_factor = Utils.normalize_data(growth_factor, id='id', columns=risk_ct.GROWTH_CT.component, treat_outlier=True, weight='cap', calc_date=calc_date)
            # # 把missingdata_factor中的缺失值替换为行业均值
            # ind_codes = set(missingdata_factor['ind_code'])
            # ind_mean_factor = {}
            # for ind_code in ind_codes:
            #     ind_mean_factor[ind_code] = growth_factor[growth_factor['ind_code'] == ind_code].mean()
            # missingdata_label = {ind: missingdata_factor.columns[missingdata.isna()].tolist() for ind, missingdata in missingdata_factor.iterrows()}
            # for ind, cols in missingdata_label.items():
            #     missingdata_factor.loc[ind, cols] = ind_mean_factor[missingdata_factor.loc[ind, 'ind_code']][cols]
            # # 把missingdata_factor和growth_factor合并
            # growth_factor = pd.concat([growth_factor, missingdata_factor])
            # # 删除ind_code列
            # growth_factor.drop(columns='ind_code', inplace=True)

            # 合成Growth因子
            growth_factor.set_index('id', inplace=True)
            weight = pd.Series(risk_ct.GROWTH_CT.weight)
            growth_factor = (growth_factor * weight).sum(axis=1)
            growth_factor.name = 'factorvalue'
            growth_factor.index.name = 'id'
            growth_factor = pd.DataFrame(growth_factor)
            growth_factor.reset_index(inplace=True)
            growth_factor['date'] = Utils.get_trading_days(start=calc_date,
                                                           ndays=2)[1]
            # 保存growth因子载荷
            if save:
                Utils.factor_loading_persistent(
                    cls._db_file,
                    Utils.datetimelike_to_str(calc_date, dash=False),
                    growth_factor.to_dict('list'),
                    ['date', 'id', 'factorvalue'])
Пример #12
0
def apm_backtest(start, end, pure_factor=False):
    """
    APM因子的历史回测
    Parameters:
    --------
    :param start: datetime-like, str
        回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初的前一个交易日,即月末交易日
    :param end: datetime-like, str
        回测结束日期,格式:YYYY-MM-DD
    :param pure_factor: bool, 默认False
        是否是对纯净因子做回测
    :return:
    """
    # 取得开始结束日期间的交易日数据
    trading_days = Utils.get_trading_days(start, end)
    # 读取截止开始日期前最新的组合回测数据
    prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1)
    if pure_factor:
        backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.pure_backtest_path)
    else:
        backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.backtest_path)
    factor_data, port_nav = Utils.get_backtest_data(backtest_path,
                                                    trading_days.iloc[0])
    # factor_data = None  # 记录每次调仓时最新入选个股的APM因子信息,pd.DataFrame<date,factorvalue,id,buyprice>
    if port_nav is None:
        port_nav = DataFrame({
            'date': [prev_trading_day.strftime('%Y-%m-%d')],
            'nav': [1.0]
        })
    # 遍历交易日,如果是月初,则读取APM因子载荷值;如果不是月初,则进行组合估值
    for trading_day in trading_days:
        if factor_data is None:
            nav = port_nav[port_nav.date == prev_trading_day.strftime(
                '%Y-%m-%d')].iloc[0].nav
        else:
            nav = port_nav[port_nav.date ==
                           factor_data.iloc[0].date].iloc[0].nav
        interval_ret = 0.0
        # 月初进行调仓
        if Utils.is_month_start(trading_day):
            logging.info('[%s] 月初调仓.' %
                         Utils.datetimelike_to_str(trading_day, True))
            # 调仓前,先估值计算按均价卖出原先组合个股在当天的估值
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    if daily_mkt.date == trading_day.strftime('%Y-%m-%d'):
                        vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                    else:
                        vwap_price = daily_mkt.close
                    interval_ret += vwap_price / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
            # 读取factor_data
            if pure_factor:
                factor_data_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    factor_ct.APM_CT.pure_apm_db_file)
            else:
                factor_data_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                                factor_ct.APM_CT.apm_db_file)
            factor_data = Utils.read_factor_loading(
                factor_data_path,
                Utils.datetimelike_to_str(prev_trading_day, False))
            # 遍历factor_data,剔除在调仓日没有正常交易(如停牌)、及涨停的个股
            ind_to_be_delted = []
            for ind, factor_info in factor_data.iterrows():
                trading_status = Utils.trading_status(factor_info.id,
                                                      trading_day)
                if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp:
                    ind_to_be_delted.append(ind)
            factor_data = factor_data.drop(ind_to_be_delted, axis=0)
            # 对factor_data按因子值降序排列,取前10%个股
            factor_data = factor_data.sort_values(by='factorvalue',
                                                  ascending=False)
            factor_data = factor_data.iloc[:int(len(factor_data) * 0.1)]
            # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益
            factor_data['buyprice'] = 0.0
            interval_ret = 0.0
            for ind, factor_info in factor_data.iterrows():
                daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                     trading_day,
                                                     fq=True,
                                                     range_lookup=False)
                assert len(daily_mkt) > 0
                factor_data.loc[
                    ind,
                    'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                interval_ret += daily_mkt.close / factor_data.loc[
                    ind, 'buyprice'] - 1.0
            interval_ret /= float(len(factor_data))
            nav *= (1.0 + interval_ret)
            # 保存factor_data
            if pure_factor:
                port_data_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    factor_ct.APM_CT.pure_backtest_path, 'port_data_%s.csv' %
                    Utils.datetimelike_to_str(trading_day, False))
            else:
                port_data_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    factor_ct.APM_CT.backtest_path, 'port_data_%s.csv' %
                    Utils.datetimelike_to_str(trading_day, False))
            factor_data.to_csv(port_data_path, index=False)
        else:
            # 非调仓日,对组合进行估值
            logging.info('[%s] 月中估值.' %
                         Utils.datetimelike_to_str(trading_day, True))
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    interval_ret += daily_mkt.close / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
        # 添加nav
        port_nav = port_nav.append(Series({
            'date':
            trading_day.strftime('%Y-%m-%d'),
            'nav':
            nav
        }),
                                   ignore_index=True)
        # 设置prev_trading_day
        prev_trading_day = trading_day
    # 保存port_nav
    if pure_factor:
        port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.pure_backtest_path,
                                     'port_nav.csv')
    else:
        port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.backtest_path,
                                     'port_nav.csv')
    port_nav.to_csv(port_nav_path, index=False)
Пример #13
0
    def _calc_synthetic_factor_loading(cls,
                                       start_date,
                                       end_date=None,
                                       month_end=True,
                                       save=False,
                                       **kwargs):
        """
        计算指定日期的样本个股的合成因子的载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷,该参数只在end_date不为None时有效,并且不论end_date是否为None,都会计算第一天的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
            'com_factors': list, 成分因子的类实例list
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0: ID, 证券ID,为索引
            1: factorvalue, 因子载荷
        """
        # 取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 遍历交易日序列, 计算合成因子下各个成分因子的因子载荷
        if 'multi_proc' not in kwargs:
            kwargs['multi_proc'] = False
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 计算各成分因子的因子载荷
            # for com_factor in eval('risk_ct.' + cls.__name__.upper() + '_CT')['component']:
            #     factor = eval(com_factor + '()')
            #     factor.calc_factor_loading(start_date=calc_date, end_date=None, month_end=month_end, save=save, multi_proc=kwargs['multi_proc'])
            for com_factor in kwargs['com_factors']:
                com_factor.calc_factor_loading(start_date=calc_date,
                                               end_date=None,
                                               month_end=month_end,
                                               save=save,
                                               multi_proc=kwargs['multi_proc'])
            # 计算合成因子
            synthetic_factor = pd.DataFrame()
            df_industry_classify = Utils.get_industry_classify()  # 个股行业分类数据
            for com_factor in eval('risk_ct.' + cls.__name__.upper() +
                                   '_CT')['component']:
                factor_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    eval('risk_ct.' + com_factor + '_CT')['db_file'])
                factor_loading = Utils.read_factor_loading(
                    factor_path,
                    Utils.datetimelike_to_str(calc_date, dash=False))
                factor_loading.drop(columns='date', inplace=True)
                factor_loading.rename(columns={'factorvalue': com_factor},
                                      inplace=True)
                # 添加行业分类数据
                factor_loading = pd.merge(
                    left=factor_loading,
                    right=df_industry_classify[['id', 'ind_code']],
                    how='inner',
                    on='id')
                # 取得含缺失值的因子载荷数据
                missingdata_factor = factor_loading[
                    factor_loading[com_factor].isna()]
                # 删除factor_loading中的缺失值
                factor_loading.dropna(axis='index', how='any', inplace=True)
                # 对factor_loading去极值、标准化
                factor_loading = Utils.normalize_data(factor_loading,
                                                      id='id',
                                                      columns=com_factor,
                                                      treat_outlier=True,
                                                      weight='cap',
                                                      calc_date=calc_date)
                # 把missingdata_factor中的缺失值替换为行业均值
                ind_codes = set(missingdata_factor['ind_code'])
                ind_mean_factor = {}
                for ind_code in ind_codes:
                    ind_mean_factor[ind_code] = factor_loading[
                        factor_loading['ind_code'] ==
                        ind_code][com_factor].mean()
                for idx, missingdata in missingdata_factor.iterrows():
                    missingdata_factor.loc[idx, com_factor] = ind_mean_factor[
                        missingdata['ind_code']]
                # 把missingdata_factor和factor_loading合并
                factor_loading = pd.concat(
                    [factor_loading, missingdata_factor])
                # 删除ind_code列
                factor_loading.drop(columns='ind_code', inplace=True)
                # merge成分因子
                if synthetic_factor.empty:
                    synthetic_factor = factor_loading
                else:
                    synthetic_factor = pd.merge(left=synthetic_factor,
                                                right=factor_loading,
                                                how='inner',
                                                on='id')

            # 合成因子
            synthetic_factor.set_index('id', inplace=True)
            weight = pd.Series(
                eval('risk_ct.' + cls.__name__.upper() + '_CT')['weight'])
            synthetic_factor = (synthetic_factor * weight).sum(axis=1)
            synthetic_factor.name = 'factorvalue'
            synthetic_factor.index.name = 'id'
            synthetic_factor = pd.DataFrame(synthetic_factor)
            synthetic_factor.reset_index(inplace=True)
            synthetic_factor['date'] = Utils.get_trading_days(start=calc_date,
                                                              ndays=2)[1]
            # 保存synthetic_factor因子载荷
            if save:
                Utils.factor_loading_persistent(
                    cls._db_file,
                    Utils.datetimelike_to_str(calc_date, dash=False),
                    synthetic_factor.to_dict('list'),
                    ['date', 'id', 'factorvalue'])
Пример #14
0
    def get_dependent_factors(cls, date):
        """
        计算用于因子提纯的相关性因子值,包换行业、规模、价值、成长、短期动量、长期动量
        Parameters:
        --------
        :param date: datetime-like or str
            日期
        :return: pd.DataFrame
            index为个股代码, columns=[28个申万一级行业,规模(scale),价值(value),成长(growth),短期动量(short_momentum),长期动量(long_momentum)]
        """
        str_date = Utils.to_date(date).strftime('%Y%m%d')
        # 1. 行业因子
        # 1.1. 读取行业分类信息
        df_industry_calssify = Utils.get_industry_classify()
        df_industry_calssify = df_industry_calssify.set_index('id')
        # 1.2. 构建行业分裂哑变量
        df_industry_dummies = pd.get_dummies(df_industry_calssify['ind_code'])
        # 2. 规模因子
        # 2.1. 读取规模因子
        scale_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                         factor_ct.SCALE_CT.db_file)
        df_scale_raw = Utils.read_factor_loading(scale_factor_path,
                                                 str_date,
                                                 nan_value=0)
        # 2.2. 规模因子去极值、标准化
        scale_cleaned_arr = Utils.clean_extreme_value(
            np.array(df_scale_raw[['LnLiquidMktCap', 'LnTotalMktCap']]))
        scale_normalized_arr = Utils.normalize_data(scale_cleaned_arr)
        # 2.3. 规模因子降维
        scale_factor_arr = np.mean(scale_normalized_arr, axis=1)
        scale_factor = Series(scale_factor_arr, index=df_scale_raw['id'])
        # 3. 价值因子
        # 3.1. 读取价值因子
        value_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                         factor_ct.VALUE_CT.db_file)
        df_value_raw = Utils.read_factor_loading(value_factor_path,
                                                 str_date,
                                                 nan_value=0)
        # 3.2. 价值因子去极值、标准化
        value_cleaned_arr = Utils.clean_extreme_value(
            np.array(df_value_raw[['ep_ttm', 'bp_lr', 'ocf_ttm']]))
        value_normalized_arr = Utils.normalize_data(value_cleaned_arr)
        # 3.3. 价值因子降维
        value_factor_arr = np.mean(value_normalized_arr, axis=1)
        value_factor = Series(value_factor_arr, index=df_value_raw['id'])
        # 4. 成长因子
        # 4.1. 读取成长因子
        growth_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                          factor_ct.GROWTH_CT.db_file)
        df_growth_raw = Utils.read_factor_loading(growth_factor_path,
                                                  str_date,
                                                  nan_value=0)
        # 4.2. 成长因子去极值、标准化
        growth_cleaned_arr = Utils.clean_extreme_value(
            np.array(df_growth_raw[['npg_ttm', 'opg_ttm']]))
        growth_normalized_arr = Utils.normalize_data(growth_cleaned_arr)
        # 4.3. 成长因子降维
        growth_factor_arr = np.mean(growth_normalized_arr, axis=1)
        growth_factor = Series(growth_factor_arr, index=df_growth_raw['id'])
        # 5. 动量因子
        # 5.1. 读取动量因子
        mom_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                       factor_ct.MOMENTUM_CT.db_file)
        df_mom_raw = Utils.read_factor_loading(mom_factor_path,
                                               str_date,
                                               nan_value=0)
        # 5.2. 动量因子去极值、标准化
        short_term_mom_header = [
            'short_term_' + d
            for d in factor_ct.MOMENTUM_CT.short_term_days.split('|')
        ]
        short_mom_cleaned_arr = Utils.clean_extreme_value(
            np.array(df_mom_raw[short_term_mom_header]))
        short_mom_normalized_arr = Utils.normalize_data(short_mom_cleaned_arr)
        long_term_mom_header = [
            'long_term_' + d
            for d in factor_ct.MOMENTUM_CT.long_term_days.split('|')
        ]
        long_mom_cleaned_arr = Utils.clean_extreme_value(
            np.array(df_mom_raw[long_term_mom_header]))
        long_mom_normalized_arr = Utils.normalize_data(long_mom_cleaned_arr)
        # 5.3. 动量因子降维
        short_mom_arr = np.mean(short_mom_normalized_arr, axis=1)
        short_mom = Series(short_mom_arr, index=df_mom_raw['id'])
        long_mom_arr = np.mean(long_mom_normalized_arr, axis=1)
        long_mom = Series(long_mom_arr, index=df_mom_raw['id'])

        # 拼接除行业因子外的因子
        df_style_factor = pd.concat(
            [scale_factor, value_factor, growth_factor, short_mom, long_mom],
            axis=1,
            keys=['scale', 'value', 'growth', 'short_mom', 'long_mom'],
            join='inner')
        # 再拼接行业因子
        df_dependent_factor = pd.concat([df_industry_dummies, df_style_factor],
                                        axis=1,
                                        join='inner')
        return df_dependent_factor
Пример #15
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期,格式:YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期,如果为None,则只计算start_date日期的因子载荷,格式:YYYY-MM-DD or YYYYMMDD
        :param month_end: bool,默认True
            如果为True,则只计算月末时点的因子载荷
        :param save: bool,默认False
            是否保存至因子数据库
        :param kwargs['synthetic_factor']: bool, 默认为False
            是否计算合成因子
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date: 日期
            1. id: 证券symbol
            2. m0: 隔夜时段动量
            3. m1: 第一个小时动量
            4. m2: 第二个小时动量
            5. m3: 第三个小时动量
            6. m4: 第四个小时动量
            7. m_normal: 传统动量
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算日内动量因子值
        dict_intraday_momentum = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info(
                '[%s] calc synthetic intraday momentum factor loading.' %
                Utils.datetimelike_to_str(calc_date))
            if 'synthetic_factor' in kwargs and kwargs[
                    'synthetic_factor']:  # 计算日内合成动量因子
                dict_intraday_momentum = {
                    'date': [],
                    'id': [],
                    'factorvalue': []
                }
                # 读取日内个时段动量因子值
                df_factor_loading = Utils.read_factor_loading(
                    cls._db_file, Utils.datetimelike_to_str(calc_date, False))
                if df_factor_loading.shape[0] <= 0:
                    logging.info(
                        "[%s] It doesn't exist intraday momentum factor loading."
                        % Utils.datetimelike_to_str(calc_date))
                    return
                df_factor_loading.fillna(0, inplace=True)
                # 读取因子最优权重
                factor_weight = cls.get_factor_weight(calc_date)
                if factor_weight is None:
                    logging.info("[%s] It doesn't exist factor weight.")
                    return
                # 计算合成动量因子
                arr_factor_loading = np.array(
                    df_factor_loading[['m0', 'm1', 'm2', 'm3', 'm4']])
                arr_factor_weight = np.array(
                    factor_weight.drop('date')).reshape((5, 1))
                arr_synthetic_factor = np.dot(arr_factor_loading,
                                              arr_factor_weight)
                # arr_synthetic_factor = np.around(arr_synthetic_factor, 6)
                dict_intraday_momentum['date'] = list(
                    df_factor_loading['date'])
                dict_intraday_momentum['id'] = list(df_factor_loading['id'])
                dict_intraday_momentum['factorvalue'] = list(
                    arr_synthetic_factor.astype(float).round(6).reshape(
                        (arr_synthetic_factor.shape[0], )))
                # 保存合成因子
                if save:
                    synthetic_db_file = os.path.join(
                        factor_ct.FACTOR_DB.db_path,
                        factor_ct.INTRADAYMOMENTUM_CT.synthetic_db_file)
                    Utils.factor_loading_persistent(
                        synthetic_db_file,
                        Utils.datetimelike_to_str(calc_date, False),
                        dict_intraday_momentum)
            else:  # 计算日内各时段动量因子
                dict_intraday_momentum = {
                    'date': [],
                    'id': [],
                    'm0': [],
                    'm1': [],
                    'm2': [],
                    'm3': [],
                    'm4': [],
                    'm_normal': []
                }
                # 遍历个股,计算个股日内动量值
                s = (calc_date -
                     datetime.timedelta(days=90)).strftime('%Y%m%d')
                stock_basics = all_stock_basics[all_stock_basics.list_date < s]

                # 采用单进程进行计算
                # for _, stock_info in stock_basics.iterrows():
                #     momentum_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
                #     if momentum_data is not None:
                #         logging.info("[%s] %s's intraday momentum = (%0.4f,%0.4f,%0.4f,%0.4f,%0.4f,%0.4f)" % (calc_date.strftime('%Y-%m-%d'),stock_info.symbol, momentum_data.m0, momentum_data.m1, momentum_data.m2, momentum_data.m3, momentum_data.m4, momentum_data.m_normal))
                #         dict_intraday_momentum['id'].append(Utils.code_to_symbol(stock_info.symbol))
                #         dict_intraday_momentum['m0'].append(round(momentum_data.m0, 6))
                #         dict_intraday_momentum['m1'].append(round(momentum_data.m1, 6))
                #         dict_intraday_momentum['m2'].append(round(momentum_data.m2, 6))
                #         dict_intraday_momentum['m3'].append(round(momentum_data.m3, 6))
                #         dict_intraday_momentum['m4'].append(round(momentum_data.m4, 6))
                #         dict_intraday_momentum['m_normal'].append(round(momentum_data.m_normal, 6))

                # 采用多进程并行计算日内动量因子载荷
                q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的因子载荷
                p = Pool(4)  # 进程池,最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    momentum_data = q.get(True)
                    dict_intraday_momentum['id'].append(momentum_data[0])
                    dict_intraday_momentum['m0'].append(
                        round(momentum_data[1], 6))
                    dict_intraday_momentum['m1'].append(
                        round(momentum_data[2], 6))
                    dict_intraday_momentum['m2'].append(
                        round(momentum_data[3], 6))
                    dict_intraday_momentum['m3'].append(
                        round(momentum_data[4], 6))
                    dict_intraday_momentum['m4'].append(
                        round(momentum_data[5], 6))
                    dict_intraday_momentum['m_normal'].append(
                        round(momentum_data[6], 6))

                date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
                dict_intraday_momentum['date'] = [date_label] * len(
                    dict_intraday_momentum['id'])
                # 保存因子载荷至因子数据库
                if save:
                    Utils.factor_loading_persistent(
                        cls._db_file, calc_date.strftime('%Y%m%d'),
                        dict_intraday_momentum)
                # 休息360秒
                logging.info('Suspending for 360s.')
                time.sleep(360)
        return dict_intraday_momentum
Пример #16
0
def _calc_Orthogonalized_factorloading(factor_name,
                                       start_date,
                                       end_date=None,
                                       month_end=True,
                                       save=False):
    """
    计算alpha因子经正交化后的因子载荷
    Parameters:
    --------
    :param factor_name: str
        alpha因子名称, e.g: SmartMoney
    :param start_date: datetime-like, str
        开始日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param end_date: datetime-like, str, 默认None
        结束日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param month_end: bool, 默认True
        是否只计算月末日期的因子载荷
    :param save: bool, 默认False
        是否保存计算结果
    :return: dict
    --------
        因子经正交化后的因子载荷
        0. date, 为计算日期的下一个交易日
        1. id, 证券代码
        2. factorvalue, 因子载荷
        如果end_date=None,返回start_date对应的因子载荷数据
        如果end_date!=None,返回最后一天的对应的因子载荷数据
        如果没有计算数据,返回None
    """
    start_date = Utils.to_date(start_date)
    if end_date is not None:
        end_date = Utils.to_date(end_date)
        trading_days_series = Utils.get_trading_days(start=start_date,
                                                     end=end_date)
    else:
        trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)

    CRiskModel = Barra()
    orthog_factorloading = {}
    for calc_date in trading_days_series:
        if month_end and (not Utils.is_month_end(calc_date)):
            continue

        # 读取目标因子原始载荷经标准化后的载荷值
        target_factor_path = os.path.join(
            SETTINGS.FACTOR_DB_PATH,
            eval('alphafactor_ct.' + factor_name.upper() + '_CT')['db_file'],
            'standardized', factor_name)
        df_targetfactor_loading = Utils.read_factor_loading(
            target_factor_path,
            Utils.datetimelike_to_str(calc_date, dash=False),
            drop_na=True)
        df_targetfactor_loading.drop(columns='date', inplace=True)
        df_targetfactor_loading.rename(columns={'factorvalue': factor_name},
                                       inplace=True)

        # 读取风险模型中的风格因子载荷矩阵
        df_stylefactor_loading = CRiskModel.get_StyleFactorloading_matrix(
            calc_date)
        df_stylefactor_loading.renmae(columns={'code': 'id'}, inplace=True)

        # 读取alpha因子载荷矩阵数据(经正交化后的载荷值)
        df_alphafactor_loading = pd.DataFrame()
        for alphafactor_name in alphafactor_ct.ALPHA_FACTORS:
            if alphafactor_name == factor_name:
                break
            factorloading_path = os.path.join(
                SETTINGS.FACTOR_DB_PATH,
                eval('alphafactor_ct.' + alphafactor_name.upper() +
                     '_CT')['db_file'], 'orthogonalized', alphafactor_name)
            factor_loading = Utils.read_factor_loading(
                factorloading_path,
                Utils.datetimelike_to_str(calc_date, dash=False),
                drop_na=True)
            factor_loading.drop(columns='date', inplace=True)
            factor_loading.rename(columns={'factorvalue': alphafactor_name},
                                  inplace=True)

            if df_alphafactor_loading.empty:
                df_alphafactor_loading = factor_loading
            else:
                df_alphafactor_loading = pd.merge(left=df_alphafactor_loading,
                                                  right=factor_loading,
                                                  how='inner',
                                                  on='id')

        # 合并目标因子载荷、风格因子载荷与alpha因子载荷
        df_factorloading = pd.merge(left=df_targetfactor_loading,
                                    right=df_stylefactor_loading,
                                    how='inner',
                                    on='id')
        if not df_alphafactor_loading.empty:
            df_factorloading = pd.merge(left=df_stylefactor_loading,
                                        right=df_alphafactor_loading,
                                        how='inner',
                                        on='id')

        # 构建目标因子载荷向量、风格与alpha因子载荷矩阵
        df_factorloading.set_index('id', inplace=True)
        arr_targetfactor_loading = np.array(df_factorloading[factor_name])
        stylealphafactor_names = df_factorloading.columns.tolist()
        stylealphafactor_names.remove(factor_name)
        arr_stylealphafactor_loading = np.array(
            df_factorloading[stylealphafactor_names])

        # 将arr_targetfactor_loading对arr_stylealphafactor_loading进行截面回归, 得到的残差即为正交化后的因子载荷
        Y = arr_targetfactor_loading
        X = sm.add_constant(arr_stylealphafactor_loading)
        model = sm.OLS(Y, X)
        results = model.fit()

        datelabel = Utils.get_trading_days(start=calc_date, ndays=2)[1]
        orthog_factorloading = {
            'date': [datelabel] * len(results.resid),
            'id': df_factorloading.index.tolist(),
            'factorvalue': results.resid
        }

        # 保存正交化后的因子载荷
        if save:
            orthog_factorloading_path = os.path.join(
                SETTINGS.FACTOR_DB_PATH,
                eval('alphafactor_ct.' + factor_name.upper() +
                     '_CT')['db_file'], 'orthogonalized', factor_name)
            Utils.factor_loading_persistent(
                orthog_factorloading_path,
                Utils.datetimelike_to_str(calc_date, dash=False),
                orthog_factorloading, ['date', 'id', 'factorvalue'])

    return orthog_factorloading