示例#1
0
 def calc_factor_loading_(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
     """
     计算指定日期的样本个股的因子载荷, 并保存至因子数据库
     Parameters:
     --------
     :param start_date: datetime-like, str
         开始日期, 格式: YYYY-MM-DD or YYYYMMDD
     :param end_date: datetime-like, str
         结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
     :param month_end: bool, 默认为True
         如果为True, 则只计算月末时点的因子载荷
     :param save: bool, 默认为True
         是否保存至因子数据库
     :param kwargs:
         'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
     :return: dict
         因子载荷数据
     """
     # 取得交易日序列
     start_date = Utils.to_date(start_date)
     if end_date is not None:
         end_date = Utils.to_date(end_date)
         trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
     else:
         trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
     # 遍历交易日序列, 计算ResVolatility因子下各个成分因子的因子载荷
     if 'multi_proc' not in kwargs:
         kwargs['multi_proc'] = False
     for calc_date in trading_days_series:
         if month_end and (not Utils.is_month_end(calc_date)):
             continue
         # 计算各成分因子的因子载荷
         for com_factor in risk_ct.RESVOLATILITY_CT.component:
             factor = eval(com_factor + '()')
             factor.calc_factor_loading(start_date=calc_date, end_date=None, month_end=month_end, save=save, multi_proc=kwargs['multi_proc'])
         # 合成ResVolatility因子载荷
         resvol_factor = pd.DataFrame()
         for com_factor in risk_ct.RESVOLATILITY_CT.component:
             factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, eval('risk_ct.' + com_factor + '_CT')['db_file'])
             factor_loading = Utils.read_factor_loading(factor_path, Utils.datetimelike_to_str(calc_date, dash=False))
             factor_loading.drop(columns='date', inplace=True)
             factor_loading[com_factor] = Utils.normalize_data(Utils.clean_extreme_value(np.array(factor_loading['factorvalue']).reshape((len(factor_loading), 1))))
             factor_loading.drop(columns='factorvalue', inplace=True)
             if resvol_factor.empty:
                 resvol_factor = factor_loading
             else:
                 resvol_factor = pd.merge(left=resvol_factor, right=factor_loading, how='inner', on='id')
         resvol_factor.set_index('id', inplace=True)
         weight = pd.Series(risk_ct.RESVOLATILITY_CT.weight)
         resvol_factor = (resvol_factor * weight).sum(axis=1)
         resvol_factor.name = 'factorvalue'
         resvol_factor.index.name = 'id'
         resvol_factor = pd.DataFrame(resvol_factor)
         resvol_factor.reset_index(inplace=True)
         resvol_factor['date'] = Utils.get_trading_days(start=calc_date, ndays=2)[1]
         # 保存ResVolatility因子载荷
         if save:
             Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), resvol_factor.to_dict('list'),['date', 'id', 'factorvalue'])
示例#2
0
    def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式:YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
        :return: dict
            因子载荷数据
        """
        # 取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
        # 遍历交易日序列, 计算NLSIZE因子载荷
        dict_nlsize = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info('[%s] Calc NLSIZE factor loading.' % Utils.datetimelike_to_str(calc_date))
            # 读取Size因子载荷数据
            lncap_data_path = os.path.join(factor_ct.FACTOR_DB.db_path, '{}_{}.csv'.format(risk_ct.SIZE_CT.db_file, Utils.datetimelike_to_str(calc_date, dash=False)))
            if not os.path.exists(lncap_data_path):
                logging.info('[%s] 的Size因子载荷数据不存在.' % Utils.datetimelike_to_str(calc_date))
                continue
            df_lncap = pd.read_csv(lncap_data_path, header=0)
            # Size因子数组
            arr_size = np.array(df_lncap['factorvalue'])
            # Size因子三次方数组
            arr_size_cube = arr_size ** 3
            # 相对Size因子正交化
            model = sm.OLS(arr_size_cube, arr_size)
            result = model.fit()
            # 对残差值进行缩尾处理和标准化
            n = len(result.resid)
            arr_resid = result.resid
            # arr_resid = result.resid.reshape(n, 1)
            # arr_resid_winsorized = Utils.clean_extreme_value(arr_resid)
            # arr_resid_standardized = Utils.normalize_data(arr_resid_winsorized)
            # 保存NLSIZE因子载荷数据
            dict_nlsize = dict({'date': df_lncap['date'].values, 'id': df_lncap['id'].values, 'factorvalue': arr_resid})
            if save:
                Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_nlsize, ['date', 'id', 'factorvalue'])

        return dict_nlsize
示例#3
0
def _calc_alphafactor_loading(start_date,
                              end_date=None,
                              factor_name=None,
                              multi_proc=False,
                              test=False):
    """
    计算alpha因子因子载荷值(原始载荷值及去极值标准化后载荷值)
    Parameters:
    --------
    :param start_date: datetime-like, str
        开始日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param end_date: datetime-like, str, 默认为None
        结束日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param factor_name: str, 默认为None
        alpha因子名称, e.g: SmartMoney
        factor_namea为None时, 计算所有alpha因子载荷值; 不为None时, 计算指定alpha因子的载荷值
    :param multi_proc: bool, 默认为None
        是否进行并行计算
    :param test: bool, 默认为False
        是否是进行因子检验
    :return: 保存因子载荷值(原始载荷值及去极值标准化后的载荷值)
    """
    # param_cons = eval('alphafactor_ct.'+factor_name.upper() + '_CT')
    start_date = Utils.to_date(start_date)
    if end_date is None:
        trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
    else:
        end_date = Utils.to_date(end_date)
        trading_days_series = Utils.get_trading_days(start=start_date,
                                                     end=end_date)

    for calc_date in trading_days_series:
        if factor_name is None:
            for alphafactor_name in alphafactor_ct.ALPHA_FACTORS:
                CAlphaFactor = eval(alphafactor_name + '()')
                CAlphaFactor.calc_factor_loading(calc_date,
                                                 month_end=True,
                                                 save=True,
                                                 multi_proc=multi_proc)
        else:
            if (not test) and (factor_name
                               not in alphafactor_ct.ALPHA_FACTORS):
                raise ValueError("alpha因子类: %s, 不存在." % factor_name)
            CAlphaFactor = eval(factor_name + '()')
            CAlphaFactor.calc_factor_loading(calc_date,
                                             month_end=True,
                                             save=True,
                                             multi_proc=multi_proc)
示例#4
0
def calc_future_ret(date, ndays):
    """
    计算date日期ndays个交易日前个股的未来1至ndays天的各个区间收益率数据
    Parameters:
    --------
    :param date: datetime-like, str
        日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param ndays: int
        天数
    :return:
    """
    # 读取过去ndays+1个交易日序列
    trading_days_series = Utils.get_trading_days(end=date, ndays=ndays+1)
    # 读取个股基本信息
    stock_basics = Utils.get_stock_basics(trading_days_series[0])
    # 从第2天开始遍历trading_days_series, 计算各个区间收益率数据
    headers = ['code'] + ['day'+str(k) for k in range(1, ndays+1)]
    df_future_ret = pd.DataFrame(columns=headers)
    for _, stock_info in stock_basics.iterrows():
        future_ret = pd.Series()
        future_ret['code'] = stock_info.symbol
        for k in range(1, ndays+1):
            future_ret['day'+str(k)] = Utils.calc_interval_ret(stock_info.symbol, start=trading_days_series[1], end=trading_days_series[k])
            if future_ret['day'+str(k)] is None:
                future_ret['day'+str(k)] = np.nan
            else:
                future_ret['day' + str(k)] = round(future_ret['day' + str(k)], 6)
        df_future_ret = df_future_ret.append(future_ret, ignore_index=True)
        df_future_ret.dropna(axis=0, how='any', inplace=True)

    # 保存数据
    cfg = ConfigParser()
    cfg.read('config.ini')
    future_ret_path = os.path.join(SETTINGS.FACTOR_DB_PATH, cfg.get('future_ret', 'ret_path'), '{}.csv'.format(Utils.datetimelike_to_str(trading_days_series[0], dash=False)))
    df_future_ret.to_csv(future_ret_path, index=False, encoding='utf-8')
示例#5
0
 def _calc_factor_loading(cls, code, calc_date):
     """
     计算指定日期、指定个股的聪明钱因子载荷
     :param code: 个股代码,如SH600000或600000
     :param calc_date: 用于读取分钟行情的交易日期列表
     :return: float
         个股的SmartQ因子载荷值,无法计算返回None
     """
     #  取得过去30天的交易日期
     trading_days = Utils.get_trading_days(end=calc_date,
                                           ndays=30,
                                           ascending=False)
     # 取得过去self.__days天交易日的分钟行情数据
     be_enough, df_min_mkt = Utils.get_min_mkts_fq(code, trading_days,
                                                   cls.__days)
     # 计算SmartMoney因子载荷值
     if be_enough:
         # 1.计算指标S_t = abs(R_t)/sqrt(V_t), R_t=第t分钟涨跌幅, V_t=第t分钟成交量
         df_min_mkt['ind_s'] = df_min_mkt.apply(
             lambda x: abs(x.ret) * 10000 / math.sqrt(x.volume * 100.0)
             if x.volume > 0 else 0,
             axis=1)
         # 2.降序排列指标S
         df_min_mkt = df_min_mkt.sort_values(by='ind_s', ascending=False)
         # 3.计算累积成交量、累积成交金额
         df_min_mkt['accum_volume'] = df_min_mkt['volume'].cumsum()
         df_min_mkt['accum_amount'] = df_min_mkt['amount'].cumsum()
         # 4.找到累积成交量占比前20%找到累积成交量占比前20%的交易,视为聪明钱(smart)交易, 那么聪明钱的情绪因子Q=VWAP_{smart}/VWAP_{all}
         # total_volume = df_min_mkt.iloc[df_min_mkt.shape[0]-1]['accum_volume'] * 100
         # total_amount = df_min_mkt.iloc[df_min_mkt.shape[0]-1]['accum_amount']
         # smart_volume = int(df_min_mkt.iloc[df_min_mkt.shape[0]-1]['accum_volume'] * 0.2)
         total_volume = df_min_mkt.iloc[-1].accum_volume * 100
         total_amount = df_min_mkt.iloc[-1].accum_amount
         smart_volume = int(df_min_mkt.iloc[-1].accum_volume * 0.2)
         vwap_all = total_amount / total_volume
         # vwap_smart = 0.0
         smart_mkt = df_min_mkt[
             df_min_mkt.accum_volume > smart_volume].iloc[0]
         vwap_smart = smart_mkt.accum_amount / (smart_mkt.accum_volume *
                                                100.0)
         # for row in df_min_mkt.itertuples():
         #     if row.accum_volume > smart_volume:
         #         vwap_smart = row.accum_amount / (row.accum_volume*100.0)
         #         break
         smart_q = round(vwap_smart / vwap_all, 6)
     else:
         smart_q = None
     # 返回个股的SmartMoney因子载荷值
     return smart_q
示例#6
0
    def _calc_periodmomentum_ic(cls, calc_date, date_interval_type='month'):
        """
        计算日内各时段动量因子的Rank IC值向量
        Parameters:
        --------
        :param calc_date: datetime-like, str
            计算日期, e.g: YYYY-MM-DD, YYYYMMDD
        :param date_interval_type: str
            个股收益率计算的时间长度, 'month'=月度收益, 'day'=日收益
        :return: pd.Series
        --------
            IC值向量
            0. date, 日期
            1. IC0, 隔夜时段动量因子IC
            2. IC1, 第1小时动量因子IC
            3. IC2, 第2小时动量因子IC
            4. IC3, 第3小时动量因子IC
            5. IC4, 第4小时动量因子IC
        """
        # 读取日内各时段动量因子载荷数据
        df_period_mom = cls._get_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False),
                                                factor_name='periodmomentum', factor_type='raw', drop_na=True)
        if df_period_mom.empty:
            return None

        if date_interval_type == 'month':
            # 读取个股下个月的月度收益率数据
            ret_start, ret_end = Utils.next_month(calc_date)
        elif date_interval_type == 'day':
            ret_start = ret_end = Utils.get_trading_days(start=calc_date, ndays=2)[1]

        df_period_mom['ret'] = np.nan
        for idx, factorloading_data in df_period_mom.iterrows():
            fret = Utils.calc_interval_ret(factorloading_data['id'], start=ret_start, end=ret_end)
            if fret is not None:
                df_period_mom.loc[idx, 'ret'] = fret
        df_period_mom.dropna(inplace=True)
        # 计算Rank IC值
        df_period_mom.drop(columns=['date', 'id', 'm_normal'], inplace=True)
        df_spearman_corr = df_period_mom.corr(method='spearman')
        rank_IC = df_spearman_corr.loc['ret', ['m0', 'm1', 'm2', 'm3', 'm4']]
        rank_IC['date'] = calc_date
        # 保存Rank IC值
        ic_filepath = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.INTRADAYMOMENTUM_CT['factor_ic_file'])
        Utils.save_timeseries_data(rank_IC, ic_filepath, save_type='a', columns=['date', 'm0', 'm1', 'm2', 'm3', 'm4'])

        return rank_IC
示例#7
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0: ID, 证券ID,为索引
            1: factorvalue, 因子载荷
            如果end_date=None,返回start_date对应的因子载荷数据
            如果end_date!=None,返回最后一天的对应的因子载荷数据
            如果没有计算数据,返回None
        """
        # 0.取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 取得样本个股信息
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算SMartQ因子载荷
        dict_factor = None
        for calc_date in trading_days_series:
            dict_factor = {'id': [], 'factorvalue': []}
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 1.获取用于读取分钟行情的交易日列表(过去30天的交易日列表,降序排列)
            # trading_days = _get_trading_days(calc_date, 30)
            # trading_days = Utils.get_trading_days(end=calc_date, ndays=30, ascending=False)
            # 2.取得样本个股信息
            # stock_basics = ts.get_stock_basics()
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            # 3.遍历样本个股代码,计算Smart_Q因子载荷值
            dict_factor = {'id': [], 'factorvalue': []}

            # 采用单进程进行计算
            # for _, stock_info in stock_basics.iterrows():
            #     # code = '%s%s' % ('SH' if code[:2] == '60' else 'SZ', code)
            #     factor_loading = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     print("[%s]Calculating %s's SmartMoney factor loading = %.4f." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol, -1.0 if factor_loading is None else factor_loading))
            #     if factor_loading is not None:
            #         # df_factor.ix[code, 'factorvalue'] = factor_loading
            #         dict_factor['id'].append(Utils.code_to_symbol(stock_info.symbol))
            #         dict_factor['factorvalue'].append(factor_loading)

            # 采用多进程并行计算SmartQ因子载荷
            q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的因子载荷值
            p = Pool(4)  # 进程池,最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                smart_q = q.get(True)
                dict_factor['id'].append(smart_q[0])
                dict_factor['factorvalue'].append(smart_q[1])

            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_factor['date'] = [date_label] * len(dict_factor['id'])
            # 4.保存因子载荷至因子数据库
            if save:
                # db = shelve.open(cls._db_file, flag='c', protocol=None, writeback=False)
                # try:
                #     db[calc_date.strftime('%Y%m%d')] = df_factor
                # finally:
                #     db.close()
                Utils.factor_loading_persistent(cls._db_file,
                                                calc_date.strftime('%Y%m%d'),
                                                dict_factor)
            # 休息300秒
            logging.info('Suspending for 360s.')
            time.sleep(360)
        return dict_factor
示例#8
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期,格式:YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期,格式:YYYY-MM-DD or YYYYMMDD
            如果为None,则只计算start_date日期的因子载荷
        :param month_end:bool, 默认True
            如果为True,则只结算月末时点的因子载荷
        :param save: bool, 默认False
            是否保存至因子数据库
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date: 日期
            1. id: 证券symbol
            2. short_term_0: 第一个短期动量因子
            3. short_term_1: 第二个短期动量因子
            4. long_term_0: 第一个长期动量因子
            5. long_term_1: 第二个长期动量因子
        """
        # 取得交易日序列及股票基本信息表
        # start_date = Utils.to_date(start_date)
        trading_days_series = Utils.get_trading_days(start=start_date,
                                                     end=end_date)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算动量因子
        dict_momentum = None
        momentum_label = cls.momentum_label()
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            dict_momentum = {'date': [], 'id': []}
            for label in momentum_label:
                dict_momentum[label] = []
            # 遍历个股,计算个股动量因子
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]

            # 采用单进程进行计算
            # for _, stock_info in stock_basics.iterrows():
            #     momentum_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     if momentum_data is not None:
            #         logging.info("[%s] calc %s's momentum factor loading." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
            #         dict_momentum['id'].append(Utils.code_to_symbol(stock_info.symbol))
            #         for label in momentum_label:
            #             dict_momentum[label].append(momentum_data[label])

            # 采用多进程并行计算动量因子载荷
            q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的因子载荷
            p = Pool(4)  # 进程池,最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                momentum_data = q.get(True)
                dict_momentum['id'].append(momentum_data['id'])
                for label in momentum_label:
                    dict_momentum[label].append(momentum_data[label])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_momentum['date'] = [date_label] * len(dict_momentum['id'])
            # 保存因子载荷至因子数据库
            if save:
                Utils.factor_loading_persistent(cls._db_file,
                                                calc_date.strftime('%Y%m%d'),
                                                dict_momentum)
            # 休息60秒
            logging.info('Suspending for 60s.')
            time.sleep(60)
        return dict_momentum
示例#9
0
    def calc_factor_loading_(cls,
                             start_date,
                             end_date=None,
                             month_end=True,
                             save=False,
                             **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷数据
        """
        # 取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 遍历交易日序列, 计算growth因子下各个成分因子的因子载荷
        if 'multi_proc' not in kwargs:
            kwargs['multi_proc'] = False
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 计算各成分因子的因子载荷
            for com_factor in risk_ct.GROWTH_CT.component:
                factor = eval(com_factor + '()')
                factor.calc_factor_loading(start_date=calc_date,
                                           end_date=None,
                                           month_end=month_end,
                                           save=save,
                                           multi_proc=kwargs['multi_proc'])
            # 合成Growth因子载荷
            growth_factor = pd.DataFrame()
            df_industry_classify = Utils.get_industry_classify()  # 个股行业分类数据
            for com_factor in risk_ct.GROWTH_CT.component:
                factor_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    eval('risk_ct.' + com_factor + '_CT')['db_file'])
                factor_loading = Utils.read_factor_loading(
                    factor_path,
                    Utils.datetimelike_to_str(calc_date, dash=False))
                factor_loading.drop(columns='date', inplace=True)
                # factor_loading[com_factor] = Utils.normalize_data(Utils.clean_extreme_value(np.array(factor_loading['factorvalue']).reshape((len(factor_loading), 1))))
                # factor_loading.drop(columns='factorvalue', inplace=True)
                factor_loading.rename(columns={'factorvalue': com_factor},
                                      inplace=True)
                # 添加行业分类数据
                factor_loading = pd.merge(
                    left=factor_loading,
                    right=df_industry_classify[['id', 'ind_code']],
                    how='inner',
                    on='id')
                # 取得含缺失值的因子载荷数据
                missingdata_factor = factor_loading[
                    factor_loading[com_factor].isna()]
                # 删除factor_loading中的缺失值
                factor_loading.dropna(axis='index', how='any', inplace=True)
                # 对factor_loading去极值、标准化
                factor_loading = Utils.normalize_data(factor_loading,
                                                      id='id',
                                                      columns=com_factor,
                                                      treat_outlier=True,
                                                      weight='cap',
                                                      calc_date=calc_date)
                # 把missingdata_factor中的缺失值替换为行业均值
                ind_codes = set(missingdata_factor['ind_code'])
                ind_mean_factor = {}
                for ind_code in ind_codes:
                    ind_mean_factor[ind_code] = factor_loading[
                        factor_loading['ind_code'] ==
                        ind_code][com_factor].mean()
                for idx, missingdata in missingdata_factor.iterrows():
                    missingdata_factor.loc[idx, com_factor] = ind_mean_factor[
                        missingdata['ind_code']]
                # 把missingdata_factor和factor_loading合并
                factor_loading = pd.concat(
                    [factor_loading, missingdata_factor])
                # 删除ind_code列
                factor_loading.drop(columns='ind_code', inplace=True)

                if growth_factor.empty:
                    growth_factor = factor_loading
                else:
                    growth_factor = pd.merge(left=growth_factor,
                                             right=factor_loading,
                                             how='inner',
                                             on='id')

            # # 读取个股行业分类数据, 添加至growth_factor中
            # df_industry_classify = Utils.get_industry_classify()
            # growth_factor = pd.merge(left=growth_factor, right=df_industry_classify[['id', 'ind_code']])
            # # 取得含缺失值的因子载荷数据
            # missingdata_factor = growth_factor.loc[[ind for ind, data in growth_factor.iterrows() if data.hasnans]]
            # # 删除growth_factot中的缺失值
            # growth_factor.dropna(axis='index', how='any', inplace=True)
            # # 对growth_factor去极值、标准化
            # growth_factor = Utils.normalize_data(growth_factor, id='id', columns=risk_ct.GROWTH_CT.component, treat_outlier=True, weight='cap', calc_date=calc_date)
            # # 把missingdata_factor中的缺失值替换为行业均值
            # ind_codes = set(missingdata_factor['ind_code'])
            # ind_mean_factor = {}
            # for ind_code in ind_codes:
            #     ind_mean_factor[ind_code] = growth_factor[growth_factor['ind_code'] == ind_code].mean()
            # missingdata_label = {ind: missingdata_factor.columns[missingdata.isna()].tolist() for ind, missingdata in missingdata_factor.iterrows()}
            # for ind, cols in missingdata_label.items():
            #     missingdata_factor.loc[ind, cols] = ind_mean_factor[missingdata_factor.loc[ind, 'ind_code']][cols]
            # # 把missingdata_factor和growth_factor合并
            # growth_factor = pd.concat([growth_factor, missingdata_factor])
            # # 删除ind_code列
            # growth_factor.drop(columns='ind_code', inplace=True)

            # 合成Growth因子
            growth_factor.set_index('id', inplace=True)
            weight = pd.Series(risk_ct.GROWTH_CT.weight)
            growth_factor = (growth_factor * weight).sum(axis=1)
            growth_factor.name = 'factorvalue'
            growth_factor.index.name = 'id'
            growth_factor = pd.DataFrame(growth_factor)
            growth_factor.reset_index(inplace=True)
            growth_factor['date'] = Utils.get_trading_days(start=calc_date,
                                                           ndays=2)[1]
            # 保存growth因子载荷
            if save:
                Utils.factor_loading_persistent(
                    cls._db_file,
                    Utils.datetimelike_to_str(calc_date, dash=False),
                    growth_factor.to_dict('list'),
                    ['date', 'id', 'factorvalue'])
示例#10
0
def smartq_backtest(start, end):
    """
    SmartQ因子的历史回测
    Parameters:
    --------
    :param start: datetime-like, str
        回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初
    :param end: datetime-like, str
        回测结束日期,格式:YYYY-MM-DD
    :return:
    """
    # 取得开始结束日期间的交易日序列
    trading_days = Utils.get_trading_days(start, end)
    # 读取截止开始日期前最新的组合回测数据
    prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1)
    backtest_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path)
    factor_data, port_nav = Utils.get_backtest_data(backtest_path,
                                                    trading_days.iloc[0])
    # factor_data = None  # 记录每次调仓时最新入选个股的SmartQ因子信息,pd.DataFrame<date,factorvalue,id,buprice>
    if port_nav is None:
        port_nav = DataFrame({
            'date': [prev_trading_day.strftime('%Y-%m-%d')],
            'nav': [1.0]
        })
    # 遍历交易日,如果是月初,则读取SmartQ因子载荷值,进行调仓;如果不是月初,则进行组合估值
    t = 0  # 记录调仓次数
    for trading_day in trading_days:
        if factor_data is None:
            nav = port_nav[port_nav.date == prev_trading_day.strftime(
                '%Y-%m-%d')].iloc[0].nav
        else:
            nav = port_nav[port_nav.date ==
                           factor_data.iloc[0].date].iloc[0].nav
        interval_ret = 0.0
        # 月初进行调仓
        if Utils.is_month_start(trading_day):
            logging.info('[%s] 月初调仓.' %
                         Utils.datetimelike_to_str(trading_day, True))
            # 调仓前,先计算组合按均价卖出原先组合个股在当天的估值
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    if daily_mkt.date == trading_day.strftime('%Y-%m-%d'):
                        vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                    else:
                        vwap_price = daily_mkt.close
                    interval_ret += vwap_price / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
            # 读取factor_data
            factor_data = Utils.read_factor_loading(
                SmartMoney.get_db_file(),
                Utils.datetimelike_to_str(prev_trading_day, False))
            # 遍历factor_data, 计算每个个股过去20天的涨跌幅,并剔除在调仓日没有正常交易(如停牌)及涨停的个股
            ind_to_be_deleted = []
            factor_data['ret20'] = np.zeros(len(factor_data))
            for ind, factor_info in factor_data.iterrows():
                trading_status = Utils.trading_status(factor_info.id,
                                                      trading_day)
                if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp:
                    ind_to_be_deleted.append(ind)
                fret20 = Utils.calc_interval_ret(factor_info.id,
                                                 end=prev_trading_day,
                                                 ndays=20)
                if fret20 is None:
                    if ind not in ind_to_be_deleted:
                        ind_to_be_deleted.append(ind)
                else:
                    factor_data.loc[ind, 'ret20'] = fret20
            factor_data = factor_data.drop(ind_to_be_deleted, axis=0)
            # 对factor_data过去20天涨跌幅降序排列,剔除涨幅最大的20%个股
            k = int(factor_data.shape[0] * 0.2)
            factor_data = factor_data.sort_values(by='ret20',
                                                  ascending=False).iloc[k:]
            del factor_data['ret20']  # 删除ret20列
            # 对factor_data按因子值升序排列,取前10%个股
            factor_data = factor_data.sort_values(by='factorvalue',
                                                  ascending=True)
            k = int(factor_data.shape[0] * 0.1)
            factor_data = factor_data.iloc[:k]
            # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益
            factor_data['buyprice'] = 0.0
            interval_ret = 0.0
            for ind, factor_info in factor_data.iterrows():
                daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                     trading_day,
                                                     fq=True,
                                                     range_lookup=False)
                assert len(daily_mkt) > 0
                factor_data.loc[
                    ind,
                    'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                interval_ret += daily_mkt.close / factor_data.loc[
                    ind, 'buyprice'] - 1.0
            interval_ret /= float(factor_data.shape[0])
            nav *= (1.0 + interval_ret)
            # 保存factor_data
            port_data_path = os.path.join(
                SETTINGS.FACTOR_DB_PATH,
                alphafactor_ct.SMARTMONEY_CT.backtest_path,
                'port_data_%s.csv' %
                Utils.datetimelike_to_str(trading_day, False))
            factor_data.to_csv(port_data_path, index=False)
            t += 1
            if t % 6 == 0:
                logging.info('Suspended for 300s.')
                time.sleep(300)
        else:
            # 非调仓日,对组合进行估值
            logging.info('[%s] 月中估值.' %
                         Utils.datetimelike_to_str(trading_day, True))
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    interval_ret += daily_mkt.close / factor_info.buyprice - 1.0
                interval_ret /= float(factor_data.shape[0])
                nav *= (1.0 + interval_ret)
        # 添加nav
        port_nav = port_nav.append(Series({
            'date':
            Utils.datetimelike_to_str(trading_day, True),
            'nav':
            nav
        }),
                                   ignore_index=True)
        # 设置prev_trading_day
        prev_trading_day = trading_day
    # 保存port_nav
    port_nav_path = os.path.join(SETTINGS.FACTOR_DB_PATH,
                                 alphafactor_ct.SMARTMONEY_CT.backtest_path,
                                 'port_nav.csv')
    port_nav.to_csv(port_nav_path, index=False)
示例#11
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期,格式:YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期,如果为None,则只计算start_date日期的因子载荷,格式:YYYY-MM-DD or YYYYMMDD
        :param month_end: bool,默认True
            如果为True,则只计算月末时点的因子载荷
        :param save: bool,默认False
            是否保存至因子数据库
        :param kwargs['synthetic_factor']: bool, 默认为False
            是否计算合成因子
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date: 日期
            1. id: 证券symbol
            2. m0: 隔夜时段动量
            3. m1: 第一个小时动量
            4. m2: 第二个小时动量
            5. m3: 第三个小时动量
            6. m4: 第四个小时动量
            7. m_normal: 传统动量
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算日内动量因子值
        dict_intraday_momentum = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info(
                '[%s] calc synthetic intraday momentum factor loading.' %
                Utils.datetimelike_to_str(calc_date))
            if 'synthetic_factor' in kwargs and kwargs[
                    'synthetic_factor']:  # 计算日内合成动量因子
                dict_intraday_momentum = {
                    'date': [],
                    'id': [],
                    'factorvalue': []
                }
                # 读取日内个时段动量因子值
                df_factor_loading = Utils.read_factor_loading(
                    cls._db_file, Utils.datetimelike_to_str(calc_date, False))
                if df_factor_loading.shape[0] <= 0:
                    logging.info(
                        "[%s] It doesn't exist intraday momentum factor loading."
                        % Utils.datetimelike_to_str(calc_date))
                    return
                df_factor_loading.fillna(0, inplace=True)
                # 读取因子最优权重
                factor_weight = cls.get_factor_weight(calc_date)
                if factor_weight is None:
                    logging.info("[%s] It doesn't exist factor weight.")
                    return
                # 计算合成动量因子
                arr_factor_loading = np.array(
                    df_factor_loading[['m0', 'm1', 'm2', 'm3', 'm4']])
                arr_factor_weight = np.array(
                    factor_weight.drop('date')).reshape((5, 1))
                arr_synthetic_factor = np.dot(arr_factor_loading,
                                              arr_factor_weight)
                # arr_synthetic_factor = np.around(arr_synthetic_factor, 6)
                dict_intraday_momentum['date'] = list(
                    df_factor_loading['date'])
                dict_intraday_momentum['id'] = list(df_factor_loading['id'])
                dict_intraday_momentum['factorvalue'] = list(
                    arr_synthetic_factor.astype(float).round(6).reshape(
                        (arr_synthetic_factor.shape[0], )))
                # 保存合成因子
                if save:
                    synthetic_db_file = os.path.join(
                        factor_ct.FACTOR_DB.db_path,
                        factor_ct.INTRADAYMOMENTUM_CT.synthetic_db_file)
                    Utils.factor_loading_persistent(
                        synthetic_db_file,
                        Utils.datetimelike_to_str(calc_date, False),
                        dict_intraday_momentum)
            else:  # 计算日内各时段动量因子
                dict_intraday_momentum = {
                    'date': [],
                    'id': [],
                    'm0': [],
                    'm1': [],
                    'm2': [],
                    'm3': [],
                    'm4': [],
                    'm_normal': []
                }
                # 遍历个股,计算个股日内动量值
                s = (calc_date -
                     datetime.timedelta(days=90)).strftime('%Y%m%d')
                stock_basics = all_stock_basics[all_stock_basics.list_date < s]

                # 采用单进程进行计算
                # for _, stock_info in stock_basics.iterrows():
                #     momentum_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
                #     if momentum_data is not None:
                #         logging.info("[%s] %s's intraday momentum = (%0.4f,%0.4f,%0.4f,%0.4f,%0.4f,%0.4f)" % (calc_date.strftime('%Y-%m-%d'),stock_info.symbol, momentum_data.m0, momentum_data.m1, momentum_data.m2, momentum_data.m3, momentum_data.m4, momentum_data.m_normal))
                #         dict_intraday_momentum['id'].append(Utils.code_to_symbol(stock_info.symbol))
                #         dict_intraday_momentum['m0'].append(round(momentum_data.m0, 6))
                #         dict_intraday_momentum['m1'].append(round(momentum_data.m1, 6))
                #         dict_intraday_momentum['m2'].append(round(momentum_data.m2, 6))
                #         dict_intraday_momentum['m3'].append(round(momentum_data.m3, 6))
                #         dict_intraday_momentum['m4'].append(round(momentum_data.m4, 6))
                #         dict_intraday_momentum['m_normal'].append(round(momentum_data.m_normal, 6))

                # 采用多进程并行计算日内动量因子载荷
                q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的因子载荷
                p = Pool(4)  # 进程池,最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    momentum_data = q.get(True)
                    dict_intraday_momentum['id'].append(momentum_data[0])
                    dict_intraday_momentum['m0'].append(
                        round(momentum_data[1], 6))
                    dict_intraday_momentum['m1'].append(
                        round(momentum_data[2], 6))
                    dict_intraday_momentum['m2'].append(
                        round(momentum_data[3], 6))
                    dict_intraday_momentum['m3'].append(
                        round(momentum_data[4], 6))
                    dict_intraday_momentum['m4'].append(
                        round(momentum_data[5], 6))
                    dict_intraday_momentum['m_normal'].append(
                        round(momentum_data[6], 6))

                date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
                dict_intraday_momentum['date'] = [date_label] * len(
                    dict_intraday_momentum['id'])
                # 保存因子载荷至因子数据库
                if save:
                    Utils.factor_loading_persistent(
                        cls._db_file, calc_date.strftime('%Y%m%d'),
                        dict_intraday_momentum)
                # 休息360秒
                logging.info('Suspending for 360s.')
                time.sleep(360)
        return dict_intraday_momentum
示例#12
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        Parameter:
        --------
        :param code: str
            个股代码, 如SH600000, 600000
        :param calc_date: datetime-like, str
            计算日期, 格式: YYYY-MM-DD
        :return: pd.Series
        --------
            个股的CMRA因子载荷
            0. code
            1. cmra
            如果计算失败, 返回None
        """
        # 取得个股日复权行情数据
        # df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date,ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1, fq=True)
        # if df_secu_quote is None:
        #     return None
        # if len(df_secu_quote) < risk_ct.CMRA_CT.listed_days:
        #     return None
        # df_secu_quote.reset_index(drop=True, inplace=True)
        # 计算个股的日对数收益率序列
        # arr_secu_close = np.array(df_secu_quote.iloc[1:]['close'])
        # arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close'])
        # arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose)
        # 每个月计算累积收益率
        # z = []
        # for t in range(1, risk_ct.CMRA_CT.trailing+1):
        #     k = t * risk_ct.CMRA_CT.days_scale - 1
        #     if k > len(arr_secu_daily_ret) - 1:
        #         k = len(arr_secu_daily_ret) - 1
        #         z.append(np.sum(arr_secu_daily_ret[:k]))
        #         break
        #     else:
        #         z.append(np.sum(arr_secu_daily_ret[:k]))

        # 计算每个月的个股价格变化率(1+r)
        # z = []
        # for t in range(1, risk_ct.CMRA_CT.trailing+1):
        #     k = t * risk_ct.CMRA_CT.days_scale
        #     if k > len(df_secu_quote)-1:
        #         k = len(df_secu_quote)-1
        #         z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close'])
        #         break
        #     else:
        #         z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close'])
        # cmra = np.log(max(z)) - np.log(min(z))



        # 取得交易日序列
        trading_days = Utils.get_trading_days(end=calc_date, ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1)
        trading_days = [day.strftime('%Y-%m-%d') for day in trading_days]
        # 取得个股复权行情数据
        df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True)
        # 提取相应交易日的个股复权行情数据
        df_secu_quote = df_secu_quote[df_secu_quote['date'].isin(trading_days)]
        df_secu_quote.reset_index(drop=True, inplace=True)
        # 计算个股每个月的个股价格变化率
        z = []
        if len(df_secu_quote) < int(risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale/2):
            # 如果提取的个股复权行情长度小于所需时间长度的一半(126个交易日), 返回None
            return None
        else:
            prev_trading_day = df_secu_quote.iloc[0]['date']
            for t in range(1, risk_ct.CMRA_CT.trailing+1):
                k = t * risk_ct.CMRA_CT.days_scale
                trading_day = trading_days[k]
                if trading_day < df_secu_quote.iloc[0]['date']:
                    continue
                # try:
                secu_trading_day = df_secu_quote[df_secu_quote['date'] <= trading_day].iloc[-1]['date']
                if secu_trading_day <= prev_trading_day:
                    continue
                else:
                    ret = df_secu_quote[df_secu_quote['date']==secu_trading_day].iloc[0]['close']/df_secu_quote.iloc[0]['close']
                    z.append(ret)
                    prev_trading_day = secu_trading_day
                # except Exception as e:
                #     print(e)
            cmra = math.log(max(z)) - math.log(min(z))
        return pd.Series([Utils.code_to_symbol(code), cmra], index=['code', 'cmra'])
示例#13
0
    def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算DASTD因子载荷
        dict_dastd = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info('[%s] Calc DASTD factor loading.' % Utils.datetimelike_to_str(calc_date))
            # 遍历个股, 计算个股的DASTD因子值
            s = (calc_date - datetime.timedelta(days=risk_ct.DASTD_CT.listed_days)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            ids = []        # 个股代码list
            dastds = []     # DASTD因子值list

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算DASTD因子值
                for _, stock_info in stock_basics.iterrows():
                    logging.info("[%s] Calc %s's DASTD factor loading." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
                    dastd_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
                    if dastd_data is None:
                        ids.append(Utils.code_to_symbol(stock_info.symbol))
                        dastds.append(np.nan)
                    else:
                        ids.append(dastd_data['code'])
                        dastds.append(dastd_data['dastd'])
            else:
                # 采用多进程并行计算DASTD因子值
                q = Manager().Queue()   # 队列, 用于进程间通信, 存储每个进程计算的因子载荷
                p = Pool(4)             # 进程池, 最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,))
                p.close()
                p.join()
                while not q.empty():
                    dastd_data = q.get(True)
                    ids.append(dastd_data['code'])
                    dastds.append(dastd_data['dastd'])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_dastd = {'date': [date_label]*len(ids), 'id': ids, 'factorvalue': dastds}
            if save:
                Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_dastd, ['date', 'id', 'factorvalue'])
            # 暂停180秒
            logging.info('Suspending for 180s.')
            # time.sleep(180)
        return dict_dastd
示例#14
0
def apm_backtest(start, end, pure_factor=False):
    """
    APM因子的历史回测
    Parameters:
    --------
    :param start: datetime-like, str
        回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初的前一个交易日,即月末交易日
    :param end: datetime-like, str
        回测结束日期,格式:YYYY-MM-DD
    :param pure_factor: bool, 默认False
        是否是对纯净因子做回测
    :return:
    """
    # 取得开始结束日期间的交易日数据
    trading_days = Utils.get_trading_days(start, end)
    # 读取截止开始日期前最新的组合回测数据
    prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1)
    if pure_factor:
        backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.pure_backtest_path)
    else:
        backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.backtest_path)
    factor_data, port_nav = Utils.get_backtest_data(backtest_path,
                                                    trading_days.iloc[0])
    # factor_data = None  # 记录每次调仓时最新入选个股的APM因子信息,pd.DataFrame<date,factorvalue,id,buyprice>
    if port_nav is None:
        port_nav = DataFrame({
            'date': [prev_trading_day.strftime('%Y-%m-%d')],
            'nav': [1.0]
        })
    # 遍历交易日,如果是月初,则读取APM因子载荷值;如果不是月初,则进行组合估值
    for trading_day in trading_days:
        if factor_data is None:
            nav = port_nav[port_nav.date == prev_trading_day.strftime(
                '%Y-%m-%d')].iloc[0].nav
        else:
            nav = port_nav[port_nav.date ==
                           factor_data.iloc[0].date].iloc[0].nav
        interval_ret = 0.0
        # 月初进行调仓
        if Utils.is_month_start(trading_day):
            logging.info('[%s] 月初调仓.' %
                         Utils.datetimelike_to_str(trading_day, True))
            # 调仓前,先估值计算按均价卖出原先组合个股在当天的估值
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    if daily_mkt.date == trading_day.strftime('%Y-%m-%d'):
                        vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                    else:
                        vwap_price = daily_mkt.close
                    interval_ret += vwap_price / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
            # 读取factor_data
            if pure_factor:
                factor_data_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    factor_ct.APM_CT.pure_apm_db_file)
            else:
                factor_data_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                                factor_ct.APM_CT.apm_db_file)
            factor_data = Utils.read_factor_loading(
                factor_data_path,
                Utils.datetimelike_to_str(prev_trading_day, False))
            # 遍历factor_data,剔除在调仓日没有正常交易(如停牌)、及涨停的个股
            ind_to_be_delted = []
            for ind, factor_info in factor_data.iterrows():
                trading_status = Utils.trading_status(factor_info.id,
                                                      trading_day)
                if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp:
                    ind_to_be_delted.append(ind)
            factor_data = factor_data.drop(ind_to_be_delted, axis=0)
            # 对factor_data按因子值降序排列,取前10%个股
            factor_data = factor_data.sort_values(by='factorvalue',
                                                  ascending=False)
            factor_data = factor_data.iloc[:int(len(factor_data) * 0.1)]
            # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益
            factor_data['buyprice'] = 0.0
            interval_ret = 0.0
            for ind, factor_info in factor_data.iterrows():
                daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                     trading_day,
                                                     fq=True,
                                                     range_lookup=False)
                assert len(daily_mkt) > 0
                factor_data.loc[
                    ind,
                    'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor
                interval_ret += daily_mkt.close / factor_data.loc[
                    ind, 'buyprice'] - 1.0
            interval_ret /= float(len(factor_data))
            nav *= (1.0 + interval_ret)
            # 保存factor_data
            if pure_factor:
                port_data_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    factor_ct.APM_CT.pure_backtest_path, 'port_data_%s.csv' %
                    Utils.datetimelike_to_str(trading_day, False))
            else:
                port_data_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    factor_ct.APM_CT.backtest_path, 'port_data_%s.csv' %
                    Utils.datetimelike_to_str(trading_day, False))
            factor_data.to_csv(port_data_path, index=False)
        else:
            # 非调仓日,对组合进行估值
            logging.info('[%s] 月中估值.' %
                         Utils.datetimelike_to_str(trading_day, True))
            if factor_data is not None:
                for ind, factor_info in factor_data.iterrows():
                    daily_mkt = Utils.get_secu_daily_mkt(factor_info.id,
                                                         trading_day,
                                                         fq=True,
                                                         range_lookup=True)
                    interval_ret += daily_mkt.close / factor_info.buyprice - 1.0
                interval_ret /= float(len(factor_data))
                nav *= (1.0 + interval_ret)
        # 添加nav
        port_nav = port_nav.append(Series({
            'date':
            trading_day.strftime('%Y-%m-%d'),
            'nav':
            nav
        }),
                                   ignore_index=True)
        # 设置prev_trading_day
        prev_trading_day = trading_day
    # 保存port_nav
    if pure_factor:
        port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.pure_backtest_path,
                                     'port_nav.csv')
    else:
        port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                     factor_ct.APM_CT.backtest_path,
                                     'port_nav.csv')
    port_nav.to_csv(port_nav_path, index=False)
示例#15
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股APM因子的stat统计量
        --------
        :param code: string
            个股代码,如600000
        :param calc_date: datetime-like, str
            因子载荷计算日期,格式YYYY-MM-DD
        :return: float
        --------
            stat统计量,计算APM因子载荷的中间变量
        """
        # 1.取得过去40个交易日序列,交易日按降序排列
        calc_date = Utils.to_date(calc_date)
        trading_days = Utils.get_trading_days(end=calc_date,
                                              ndays=40,
                                              ascending=False)

        # 2.取得个股及指数过去__days+1个交易日每个交易日的开盘价、中午收盘价和当天收盘价
        #   开盘价为09:31分钟线的开盘价,中午收盘价为11:30分钟线的收盘价,当天收盘价为15:00分钟线的收盘价
        #   返回的数据格式为DataFrame,columns=['date','open','mid_close','close'],按日期升序排列
        # secu_mkt_data = DataFrame()
        # index_mkt_data = DataFrame()
        # mkt_data_header = ['date', 'open', 'mid_close', 'close']
        # k = 0
        # for trading_day in trading_days:
        #     df_1min_data = Utils.get_min_mkt(Utils.code_to_symbol(code), trading_day, fq=True)
        #     if df_1min_data is not None:
        #         str_date = Utils.datetimelike_to_str(trading_day)
        #         fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' % str_date].iloc[0].open
        #         fmid_close = df_1min_data[df_1min_data.datetime == '%s 11:30:00' % str_date].iloc[0].close
        #         fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' % str_date].iloc[0].close
        #         secu_mkt_data = secu_mkt_data.append(
        #             Series([str_date, fopen, fmid_close, fclose], index=mkt_data_header), ignore_index=True)
        #
        #         df_1min_data = Utils.get_min_mkt(factor_ct.APM_CT.index_code, trading_day, index=True, fq=True)
        #         fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' % str_date].iloc[0].open
        #         fmid_close = df_1min_data[df_1min_data.datetime == '%s 11:30:00' % str_date].iloc[0].close
        #         fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' % str_date].iloc[0].close
        #         index_mkt_data = index_mkt_data.append(
        #             Series([str_date, fopen, fmid_close, fclose], index=mkt_data_header), ignore_index=True)
        #         k += 1
        #         if k > cls.__days:
        #             break
        # if k <= cls.__days:
        #     return None
        # secu_mkt_data = secu_mkt_data.sort_values(by='date')
        # secu_mkt_data = secu_mkt_data.reset_index(drop=True)
        # index_mkt_data = index_mkt_data.sort_values(by='date')
        # index_mkt_data = index_mkt_data.reset_index(drop=True)
        # #  3.计算个股及指数的上午收益率数组r_t^{am},R_t^{am}和下午收益率数组r_t^{pm},R_t^{pm},并拼接为一个数组
        # #    拼接后的收益率数组,上半部分为r_t^{am} or R_t^{am},下半部分为r_t^{pm} or R_t^{pm}
        # r_am_array = np.zeros((cls.__days, 1))
        # r_pm_array = np.zeros((cls.__days, 1))
        # for ind in secu_mkt_data.index[1:]:
        #     r_am_array[ind-1, 0] = secu_mkt_data.loc[ind, 'mid_close'] / secu_mkt_data.loc[ind-1, 'close'] - 1.0
        #     r_pm_array[ind-1, 0] = secu_mkt_data.loc[ind, 'close'] / secu_mkt_data.loc[ind, 'mid_close'] - 1.0
        # r_apm_array = np.concatenate((r_am_array, r_pm_array), axis=0)
        #
        # R_am_array = np.zeros((cls.__days, 1))
        # R_pm_array = np.zeros((cls.__days, 1))
        # for ind in index_mkt_data.index[1:]:
        #     R_am_array[ind-1, 0] = index_mkt_data.loc[ind, 'mid_close'] / index_mkt_data.loc[ind-1, 'close'] - 1.0
        #     R_pm_array[ind-1, 0] = index_mkt_data.loc[ind, 'close'] / index_mkt_data.loc[ind, 'mid_close'] - 1.0
        # R_apm_array = np.concatenate((R_am_array, R_pm_array), axis=0)

        # 遍历交易日序列,计算个股及指数的上午收益率(r_am_array,R_am_array)和下午收益率序列(r_pm_array,R_pm_array)
        r_am_array = np.zeros((cls.__days, 1))
        r_pm_array = np.zeros((cls.__days, 1))
        R_am_array = np.zeros((cls.__days, 1))
        R_pm_array = np.zeros((cls.__days, 1))
        k = 0
        for trading_day in trading_days:
            df_1min_data = Utils.get_min_mkt(Utils.code_to_symbol(code),
                                             trading_day,
                                             fq=True)
            if df_1min_data is not None:
                str_date = Utils.datetimelike_to_str(trading_day)
                fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' %
                                     str_date].iloc[0].open
                fmid_close = df_1min_data[df_1min_data.datetime ==
                                          '%s 11:30:00' %
                                          str_date].iloc[0].close
                fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' %
                                      str_date].iloc[0].close
                r_am_array[k, 0] = fmid_close / fopen - 1.0
                r_pm_array[k, 0] = fclose / fmid_close - 1.0

                df_1min_data = Utils.get_min_mkt(factor_ct.APM_CT.index_code,
                                                 trading_day,
                                                 index=True,
                                                 fq=True)
                fopen = df_1min_data[df_1min_data.datetime == '%s 09:31:00' %
                                     str_date].iloc[0].open
                fmid_close = df_1min_data[df_1min_data.datetime ==
                                          '%s 11:30:00' %
                                          str_date].iloc[0].close
                fclose = df_1min_data[df_1min_data.datetime == '%s 15:00:00' %
                                      str_date].iloc[0].close
                R_am_array[k, 0] = fmid_close / fopen - 1.0
                R_pm_array[k, 0] = fclose / fmid_close - 1.0

                k += 1
                if k == cls.__days:
                    break
        if k < cls.__days:
            return None
        r_apm_array = np.concatenate((r_am_array, r_pm_array), axis=0)
        R_apm_array = np.concatenate((R_am_array, R_pm_array), axis=0)

        # 4.个股收益率数组相对于指数收益率进行线性回归
        #   将指数收益率数组添加常数项
        R_apm_array = sm.add_constant(R_apm_array)
        #   线性回归:r_i = \alpha + \beta * R_i + \epsilon_i
        stat_model = sm.OLS(r_apm_array, R_apm_array)
        stat_result = stat_model.fit()
        resid_array = stat_result.resid.reshape((cls.__days * 2, 1))  # 回归残差数组
        # 5.计算stat统计量
        #   以上得到的__days*2个残差\epsilon_i中,属于上午的记为\epsilon_i^{am},属于下午的记为\epsilong_i^{pm},计算每日上午与
        #   下午残差的差值:$\sigma_t = \spsilon_i^{am} - \epsilon_i^{pm}$,为了衡量上午与下午残差的差异程度,设计统计量:
        #   $stat = \frac{\mu(\sigma_t)}{\delta(\sigma_t)\sqrt(N)}$,其中\mu为均值,\sigma为标准差,N=__days,总的来说
        #   统计量stat反映了剔除市场影响后股价行为上午与下午的差异程度。stat数值大(小)于0越多,则股票在上午的表现越好(差)于下午。
        delta_array = resid_array[:cls.__days] - resid_array[
            cls.__days:]  # 上午与 下午的残差差值
        delta_avg = np.mean(delta_array)  # 残差差值的均值
        delta_std = np.std(delta_array)  # 残差差值的标准差
        # 如果残差差值的标准差接近于0,返回None
        if np.fabs(delta_std) < 0.0001:
            return None
        stat = delta_avg / delta_std / np.sqrt(cls.__days)
        # logging.info('%s, stat = %.6f' % (code, stat))
        return stat
示例#16
0
 def calc_factorloading(self, start_date, end_date=None):
     """
     计算风险因子的因子载荷
     Parameters:
     --------
     :param start_date: datetime-like, str
         计算开始日期, 格式: YYYY-MM-DD
     :param end_date: datetime-like, str
         计算结束日期, 格式: YYYY-MM-DD
     :return: None
     """
     # 读取交易日序列
     start_date = Utils.to_date(start_date)
     if not end_date is None:
         end_date = Utils.to_date(end_date)
         trading_days_series = Utils.get_trading_days(start=start_date,
                                                      end=end_date)
     else:
         trading_days_series = Utils.get_trading_days(start=start_date,
                                                      ndays=1)
     # 遍历交易日序列, 计算风险因子的因子载荷
     for calc_date in trading_days_series:
         Size.calc_factor_loading(start_date=start_date,
                                  end_date=None,
                                  month_end=False,
                                  save=True,
                                  multi_proc=True)
         Beta.calc_factor_loading(start_date=start_date,
                                  end_date=None,
                                  month_end=False,
                                  save=True,
                                  multi_proc=True)
         Momentum.calc_factor_loading(start_date=start_date,
                                      end_date=None,
                                      month_end=False,
                                      save=True,
                                      multi_proc=True)
         ResVolatility.calc_factor_loading(start_date=start_date,
                                           end_date=None,
                                           month_end=False,
                                           save=True,
                                           multi_proc=True)
         NonlinearSize.calc_factor_loading(start_date=start_date,
                                           end_date=None,
                                           month_end=False,
                                           save=True,
                                           multi_proc=True)
         Value.calc_factor_loading(start_date=start_date,
                                   end_date=None,
                                   month_end=False,
                                   save=True,
                                   multi_proc=True)
         Liquidity.calc_factor_loading(start_date=start_date,
                                       end_date=None,
                                       month_end=False,
                                       save=True,
                                       multi_proc=True)
         EarningsYield.calc_factor_loading(start_date=start_date,
                                           end_date=None,
                                           month_end=False,
                                           save=True,
                                           multi_proc=True)
         Growth.calc_factor_loading(start_date=start_date,
                                    end_date=None,
                                    month_end=False,
                                    save=True,
                                    multi_proc=True)
         Leverage.calc_factor_loading(start_date=start_date,
                                      end_date=None,
                                      month_end=False,
                                      save=True,
                                      multi_proc=True)
示例#17
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷,该参数只在end_date不为None时有效,并且不论end_date是否为None,都会计算第一天的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0: id, 证券ID
            1: factorvalue, 因子载荷
            如果end_date=None,返回start_date对应的因子载荷数据
            如果end_date!=None,返回最后一天的对应的因子载荷数据
            如果没有计算数据,返回None
        """
        # 1.取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 2.遍历交易日序列,计算APM因子载荷
        dict_apm = None
        for calc_date in trading_days_series:
            dict_apm = {'date': [], 'id': [], 'factorvalue': []}
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 2.1.遍历个股,计算个股APM.stat统计量,过去20日收益率,分别放进stat_lst,ret20_lst列表中
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            stat_lst = []
            ret20_lst = []
            symbol_lst = []

            # 采用单进程计算
            # for _, stock_info in stock_basics.iterrows():
            #     stat_i = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     ret20_i = Utils.calc_interval_ret(stock_info.symbol, end=calc_date, ndays=20)
            #     if stat_i is not None and ret20_i is not None:
            #         stat_lst.append(stat_i)
            #         ret20_lst.append(ret20_i)
            #         symbol_lst.append(Utils.code_to_symbol(stock_info.symbol))
            #         logging.info('APM of %s = %f' % (stock_info.symbol, stat_i))

            # 采用多进程并行计算
            q = Manager().Queue()
            p = Pool(4)  # 最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                apm_value = q.get(True)
                symbol_lst.append(apm_value[0])
                stat_lst.append(apm_value[1])
                ret20_lst.append(apm_value[2])

            assert len(stat_lst) == len(ret20_lst)
            assert len(stat_lst) == len(symbol_lst)

            # 2.2.构建APM因子
            # 2.2.1.将统计量stat对动量因子ret20j进行截面回归:stat_j = \beta * Ret20_j + \epsilon_j
            #     残差向量即为对应个股的APM因子
            # 截面回归之前,先对stat统计量和动量因子进行去极值和标准化处理
            stat_arr = np.array(stat_lst).reshape((len(stat_lst), 1))
            ret20_arr = np.array(ret20_lst).reshape((len(ret20_lst), 1))
            stat_arr = Utils.clean_extreme_value(stat_arr)
            stat_arr = Utils.normalize_data(stat_arr)
            ret20_arr = Utils.clean_extreme_value(ret20_arr)
            ret20_arr = Utils.normalize_data(ret20_arr)
            # 回归分析
            # ret20_arr = sm.add_constant(ret20_arr)
            apm_model = sm.OLS(stat_arr, ret20_arr)
            apm_result = apm_model.fit()
            apm_lst = list(np.around(apm_result.resid, 6))  # amp因子载荷精确到6位小数
            assert len(apm_lst) == len(symbol_lst)
            # 2.2.2.构造APM因子字典,并持久化
            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_apm = {
                'date': [date_label] * len(symbol_lst),
                'id': symbol_lst,
                'factorvalue': apm_lst
            }
            if save:
                Utils.factor_loading_persistent(cls._db_file,
                                                calc_date.strftime('%Y%m%d'),
                                                dict_apm)

            # 2.3.构建PureAPM因子
            # 将stat_arr转换为DataFrame, 此时的stat_arr已经经过了去极值和标准化处理
            df_stat = DataFrame(stat_arr, index=symbol_lst, columns=['stat'])
            # 取得提纯的因变量因子
            df_dependent_factor = cls.get_dependent_factors(calc_date)
            # 将df_stat和因变量因子拼接
            df_data = pd.concat([df_stat, df_dependent_factor],
                                axis=1,
                                join='inner')
            # OLS回归,提纯APM因子
            arr_data = np.array(df_data)
            pure_apm_model = sm.OLS(arr_data[:, 0], arr_data[:, 1:])
            pure_apm_result = pure_apm_model.fit()
            pure_apm_lst = list(np.around(pure_apm_result.resid, 6))
            pure_symbol_lst = list(df_data.index)
            assert len(pure_apm_lst) == len(pure_symbol_lst)
            # 构造pure_apm因子字典,并持久化
            dict_pure_apm = {
                'date': [date_label] * len(pure_symbol_lst),
                'id': pure_symbol_lst,
                'factorvalue': pure_apm_lst
            }
            pure_apm_db_file = os.path.join(factor_ct.FACTOR_DB.db_path,
                                            factor_ct.APM_CT.pure_apm_db_file)
            if save:
                Utils.factor_loading_persistent(pure_apm_db_file,
                                                calc_date.strftime('%Y%m%d'),
                                                dict_pure_apm)
            # 休息360秒
            logging.info('Suspended for 360s.')
            time.sleep(360)
        return dict_apm
示例#18
0
def _calc_mvpfp_performance(factor_name, start_date, end_date):
    """
    计算最小波动纯因子组合的绩效
    Parameters:
    --------
    :param factor_name: str
        因子名称, e.g: SmartMoney
    :param start_date: datetime-like, str
        开始日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param end_date: datetime-like, str
        结束日期, e.g: YYYY-MM-DD, YYYYMMDD
    :return:
    """
    start_date = Utils.to_date(start_date)
    end_date = Utils.to_date(end_date)
    # 读取mvpfp组合持仓数据, 构建Portfolio
    mvpfp_path = os.path.join(
        SETTINGS.FACTOR_DB_PATH,
        eval('alphafactor_ct.' + factor_name.upper() + '.CT')['db_file'],
        'mvpfp')
    if not os.path.isdir(mvpfp_path):
        raise NotADirectoryError("%s因子的mvpfp组合文件夹不存在.")
    mvpfp_port = CPortfolio('weight_holding')
    for mvpfp_filename in os.listdir(mvpfp_path):
        if os.path.splitext(mvpfp_filename)[1] != '.csv':
            continue
        mvpfp_date = Utils.to_date(mvpfp_filename.split('.')[0])
        if mvpfp_date < start_date or mvpfp_date > end_date:
            continue
        mvpfp_filepath = os.path.join(mvpfp_path, mvpfp_filename)
        mvpfp_port.load_holdings_fromfile(mvpfp_filepath)
    # 遍历持仓数据, 计算组合绩效
    df_daily_performance = pd.DataFrame(
        columns=alphamodel_ct.FACTOR_PERFORMANCE_HEADER['daily_performance']
    )  # 日度绩效
    df_monthly_performance = pd.DataFrame(
        columns=alphamodel_ct.FACTOR_PERFORMANCE_HEADER['monthly_performance']
    )  # 月度绩效

    df_daily_performance.loc[0, 'daily_ret'] = 0.0
    df_daily_performance.loc[0, 'nav'] = 1.0
    df_daily_performance.loc[0, 'accu_ret'] = 0.0

    mvpfp_holdings = mvpfp_port.holdings
    prev_holdingdate = curr_holding_date = None
    prevmonth_idx = 0
    holding_dates = list(mvpfp_holdings.keys())
    df_daily_performance.loc[0, 'date'] = holding_dates[0]
    if end_date > holding_dates[-1]:
        holding_dates += [end_date]
    mvpfp_daily_performance = pd.Series(
        index=alphamodel_ct.FACTOR_PERFORMANCE_HEADER['daily_performance'])
    mvpfp_monthly_performance = pd.Series(
        index=alphamodel_ct.FACTOR_PERFORMANCE_HEADER['monthly_performance'])
    for holding_date in holding_dates:
        prev_holdingdate = curr_holding_date
        curr_holding_date = holding_date
        if prev_holdingdate is None:
            continue
        prevmonth_idx = df_daily_performance.index[-1]
        holding_data = mvpfp_holdings[prev_holdingdate]
        trading_days_series = Utils.get_trading_days(
            start=prev_holdingdate + datetime.timedelta(days=1),
            end=curr_holding_date)
        for calc_date in trading_days_series:
            mvpfp_daily_performance['date'] = calc_date
            daily_ret = 0
            # TODO 增加并行计算个股绩效的功能
            for _, holding in holding_data.holding.iterrows():
                ret = Utils.calc_interval_ret(holding['code'],
                                              start=trading_days_series[0],
                                              end=calc_date)
                if ret is not None:
                    daily_ret += ret * holding['weight']
            mvpfp_daily_performance['daily_ret'] = daily_ret
            mvpfp_daily_performance[
                'nav'] = df_daily_performance.iloc[-1]['nav'] * (1 + daily_ret)
            mvpfp_daily_performance[
                'accu_ret'] = mvpfp_daily_performance['nav'] - 1

            df_daily_performance = df_daily_performance.append(
                mvpfp_daily_performance, ignore_index=True)

        mvpfp_monthly_performance['date'] = curr_holding_date
        mvpfp_monthly_performance['monthly_ret'] = df_daily_performance.iloc[
            -1]['nav'] / df_daily_performance.loc[prevmonth_idx, 'nav'] - 1.0
        df_monthly_performance = df_monthly_performance.append(
            mvpfp_monthly_performance, ignore_index=True)

    # for k in range(1, len(df_daily_performance)):
    #     df_daily_performance.loc[k, 'nav'] = df_daily_performance.loc[k-1, 'nav'] * (1 + df_daily_performance.loc[k, 'daily_ret'])
    #     df_daily_performance.loc[k, 'accu_ret'] = df_daily_performance.loc[k, 'nav'] - 1

    # 保存数据
    _save_mvpfp_performance(df_daily_performance, factor_name, 'daily', 'a')
    _save_mvpfp_performance(df_monthly_performance, factor_name, 'monthly',
                            'a')
示例#19
0
    def _calc_synthetic_factor_loading(cls,
                                       start_date,
                                       end_date=None,
                                       month_end=True,
                                       save=False,
                                       **kwargs):
        """
        计算指定日期的样本个股的合成因子的载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷,该参数只在end_date不为None时有效,并且不论end_date是否为None,都会计算第一天的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
            'com_factors': list, 成分因子的类实例list
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0: ID, 证券ID,为索引
            1: factorvalue, 因子载荷
        """
        # 取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 遍历交易日序列, 计算合成因子下各个成分因子的因子载荷
        if 'multi_proc' not in kwargs:
            kwargs['multi_proc'] = False
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 计算各成分因子的因子载荷
            # for com_factor in eval('risk_ct.' + cls.__name__.upper() + '_CT')['component']:
            #     factor = eval(com_factor + '()')
            #     factor.calc_factor_loading(start_date=calc_date, end_date=None, month_end=month_end, save=save, multi_proc=kwargs['multi_proc'])
            for com_factor in kwargs['com_factors']:
                com_factor.calc_factor_loading(start_date=calc_date,
                                               end_date=None,
                                               month_end=month_end,
                                               save=save,
                                               multi_proc=kwargs['multi_proc'])
            # 计算合成因子
            synthetic_factor = pd.DataFrame()
            df_industry_classify = Utils.get_industry_classify()  # 个股行业分类数据
            for com_factor in eval('risk_ct.' + cls.__name__.upper() +
                                   '_CT')['component']:
                factor_path = os.path.join(
                    factor_ct.FACTOR_DB.db_path,
                    eval('risk_ct.' + com_factor + '_CT')['db_file'])
                factor_loading = Utils.read_factor_loading(
                    factor_path,
                    Utils.datetimelike_to_str(calc_date, dash=False))
                factor_loading.drop(columns='date', inplace=True)
                factor_loading.rename(columns={'factorvalue': com_factor},
                                      inplace=True)
                # 添加行业分类数据
                factor_loading = pd.merge(
                    left=factor_loading,
                    right=df_industry_classify[['id', 'ind_code']],
                    how='inner',
                    on='id')
                # 取得含缺失值的因子载荷数据
                missingdata_factor = factor_loading[
                    factor_loading[com_factor].isna()]
                # 删除factor_loading中的缺失值
                factor_loading.dropna(axis='index', how='any', inplace=True)
                # 对factor_loading去极值、标准化
                factor_loading = Utils.normalize_data(factor_loading,
                                                      id='id',
                                                      columns=com_factor,
                                                      treat_outlier=True,
                                                      weight='cap',
                                                      calc_date=calc_date)
                # 把missingdata_factor中的缺失值替换为行业均值
                ind_codes = set(missingdata_factor['ind_code'])
                ind_mean_factor = {}
                for ind_code in ind_codes:
                    ind_mean_factor[ind_code] = factor_loading[
                        factor_loading['ind_code'] ==
                        ind_code][com_factor].mean()
                for idx, missingdata in missingdata_factor.iterrows():
                    missingdata_factor.loc[idx, com_factor] = ind_mean_factor[
                        missingdata['ind_code']]
                # 把missingdata_factor和factor_loading合并
                factor_loading = pd.concat(
                    [factor_loading, missingdata_factor])
                # 删除ind_code列
                factor_loading.drop(columns='ind_code', inplace=True)
                # merge成分因子
                if synthetic_factor.empty:
                    synthetic_factor = factor_loading
                else:
                    synthetic_factor = pd.merge(left=synthetic_factor,
                                                right=factor_loading,
                                                how='inner',
                                                on='id')

            # 合成因子
            synthetic_factor.set_index('id', inplace=True)
            weight = pd.Series(
                eval('risk_ct.' + cls.__name__.upper() + '_CT')['weight'])
            synthetic_factor = (synthetic_factor * weight).sum(axis=1)
            synthetic_factor.name = 'factorvalue'
            synthetic_factor.index.name = 'id'
            synthetic_factor = pd.DataFrame(synthetic_factor)
            synthetic_factor.reset_index(inplace=True)
            synthetic_factor['date'] = Utils.get_trading_days(start=calc_date,
                                                              ndays=2)[1]
            # 保存synthetic_factor因子载荷
            if save:
                Utils.factor_loading_persistent(
                    cls._db_file,
                    Utils.datetimelike_to_str(calc_date, dash=False),
                    synthetic_factor.to_dict('list'),
                    ['date', 'id', 'factorvalue'])
示例#20
0
    def _calc_factor_loading(cls, code, calc_date):
        """
        计算指定日期、指定个股日内各时点动量值
        Parameters
        --------
        :param code: str
            个股代码,如600000或SH600000
        :param calc_date: datetime-like, str
            因子载荷计算日期,格式YYYY-MM-DD
        :return: pd.Series
        --------
            日内个时点的动量值,各个index对应的含义如下:
            0. m0: 隔夜时段动量
            1. m1: 第一个小时动量
            2. m2: 第二个小时动量
            3. m3: 第三个小时动量
            4. m4: 第四个小时动量
            5. m_normal: 传统动量
            若计算失败,返回None
        """
        # 取得过去90天的交易日序列,按日期降序排列
        trading_days = Utils.get_trading_days(end=calc_date,
                                              ndays=90,
                                              ascending=False)
        # 取得个股过去90天中的最近21天的1分钟行情数据,根据每天的分钟行情读取日内5个时点的价格,并计算日内收益值
        mkt_data = DataFrame()
        mkt_data_header = ['date', 'p0930', 'p1030', 'p1130', 'p1400', 'p1500']
        intra_day_ret = DataFrame()
        ret_header = ['date', 'r0', 'r1', 'r2', 'r3', 'r4']
        k = 0
        for trading_day in trading_days:
            df_1m_mkt = Utils.get_min_mkt(code, trading_day, fq=True)
            if df_1m_mkt is None:
                continue
            # 计算日内5个时点的价格
            time_label = '%s 09:31:00' % trading_day.strftime('%Y-%m-%d')
            p0930 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].open
            time_label = '%s 10:30:00' % trading_day.strftime('%Y-%m-%d')
            p1030 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close
            time_label = '%s 11:30:00' % trading_day.strftime('%Y-%m-%d')
            p1130 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close
            time_label = '%s 14:00:00' % trading_day.strftime('%Y-%m-%d')
            p1400 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close
            time_label = '%s 15:00:00' % trading_day.strftime('%Y-%m-%d')
            p1500 = df_1m_mkt[df_1m_mkt.datetime == time_label].iloc[0].close
            s = Series([trading_day, p0930, p1030, p1130, p1400, p1500],
                       index=mkt_data_header)
            mkt_data = mkt_data.append(s, ignore_index=True)
            # 计算日内收益
            if k > 0:
                r0 = math.log(mkt_data.iloc[k - 1].p0930 /
                              mkt_data.iloc[k].p1500)
                r1 = math.log(mkt_data.iloc[k - 1].p1030 /
                              mkt_data.iloc[k - 1].p0930)
                r2 = math.log(mkt_data.iloc[k - 1].p1130 /
                              mkt_data.iloc[k - 1].p1030)
                r3 = math.log(mkt_data.iloc[k - 1].p1400 /
                              mkt_data.iloc[k - 1].p1130)
                r4 = math.log(mkt_data.iloc[k - 1].p1500 /
                              mkt_data.iloc[k - 1].p1400)

                # r0 = mkt_data.iloc[k - 1].p0930 / mkt_data.iloc[k].p1500 -1.0
                # r1 = mkt_data.iloc[k - 1].p1030 / mkt_data.iloc[k - 1].p0930 - 1.0
                # r2 = mkt_data.iloc[k - 1].p1130 / mkt_data.iloc[k - 1].p1030 - 1.0
                # r3 = mkt_data.iloc[k - 1].p1400 / mkt_data.iloc[k - 1].p1130 - 1.0
                # r4 = mkt_data.iloc[k - 1].p1500 / mkt_data.iloc[k - 1].p1400 - 1.0

                s = Series([mkt_data.iloc[k - 1].date, r0, r1, r2, r3, r4],
                           index=ret_header)
                intra_day_ret = intra_day_ret.append(s, ignore_index=True)
            k += 1
            if k > cls.__days:
                break
        if k <= cls.__days:
            return None
        intra_day_ret = intra_day_ret.sort_values(by='date')
        # mkt_data = mkt_data.sort_values(by='date')
        # mkt_data = mkt_data.reset_index(drop=True)
        # 计算传统动量因子值,=过去20日的涨跌幅
        m_normal = math.log(mkt_data.iloc[0].p1500 / mkt_data.iloc[-1].p1500)

        # m_normal = mkt_data.iloc[0].p1500 / mkt_data.iloc[-1].p1500 - 1.0

        # 遍历上述取得的行情数据,计算每日的日内收益值
        # intra_day_ret = DataFrame()
        # ret_header = ['date', 'r0', 'r1', 'r2', 'r3', 'r4']
        # for k in range(1, len(mkt_data)):
        #     r0 = math.log(mkt_data.iloc[k].p0930 / mkt_data.iloc[k-1].p1500)
        #     r1 = math.log(mkt_data.iloc[k].p1030 / mkt_data.iloc[k].p0930)
        #     r2 = math.log(mkt_data.iloc[k].p1130 / mkt_data.iloc[k].p1030)
        #     r3 = math.log(mkt_data.iloc[k].p1400 / mkt_data.iloc[k].p1130)
        #     r4 = math.log(mkt_data.iloc[k].p1500 / mkt_data.iloc[k].p1400)
        #     s = Series([mkt_data.iloc[k].date, r0, r1, r2, r3, r4], index=ret_header)
        #     intra_day_ret = intra_day_ret.append(s, ignore_index=True)
        intra_day_ret = intra_day_ret.set_index('date')
        # 个股的日内各时点的动量因子值等于过去20个交易日各个r_i累加
        intra_day_momentum = intra_day_ret.sum()
        intra_day_momentum.index = ['m0', 'm1', 'm2', 'm3', 'm4']
        intra_day_momentum['m_normal'] = m_normal
        return intra_day_momentum
示例#21
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本股的因子载荷,并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like or str
            开始日期,格式:YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like or str
            结束日期,格式:YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认True
            如果为True,则只计算月末时点的因子载荷;否则每个交易日都计算
        :param save: bool, 默认False
            是否保存至因子数据库
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date: 日期
            1. id: 证券symbol
            2. LnTotalMktCap: 总市值对数值
            3. LnLiquidMktCap: 流通市值对数值
        """
        # 取得交易日序列股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算规模因子值
        dict_scale = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            dict_scale = {
                'date': [],
                'id': [],
                'LnTotalMktCap': [],
                'LnLiquidMktCap': []
            }
            # 遍历个股,计算个股规模因子值
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]

            # 采用单进程进行计算规模因子
            # for _, stock_info in stock_basics.iterrows():
            #     scale_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     if scale_data is not None:
            #         logging.info("[%s] %s's total mkt cap = %.0f, liquid mkt cap = %.0f" % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol, scale_data.LnTotalMktCap, scale_data.LnLiquidMktCap))
            #         dict_scale['id'].append(Utils.code_to_symbol(stock_info.symbol))
            #         dict_scale['LnTotalMktCap'].append(round(scale_data.LnTotalMktCap, 4))
            #         dict_scale['LnLiquidMktCap'].append(round(scale_data.LnLiquidMktCap, 4))

            # 采用多进程并行计算规模因子
            q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的规模因子值
            p = Pool(4)  # 进程池,最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                scale_data = q.get(True)
                dict_scale['id'].append(scale_data[0])
                dict_scale['LnTotalMktCap'].append(round(scale_data[1], 4))
                dict_scale['LnLiquidMktCap'].append(round(scale_data[2], 4))

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_scale['date'] = [date_label] * len(dict_scale['id'])
            # 保存规模因子载荷至因子数据库
            if save:
                Utils.factor_loading_persistent(cls._db_file,
                                                calc_date.strftime('%Y%m%d'),
                                                dict_scale)
            # 休息60秒
            logging.info('Suspending for 60s.')
            time.sleep(60)
        return dict_scale
示例#22
0
def _calc_Orthogonalized_factorloading(factor_name,
                                       start_date,
                                       end_date=None,
                                       month_end=True,
                                       save=False):
    """
    计算alpha因子经正交化后的因子载荷
    Parameters:
    --------
    :param factor_name: str
        alpha因子名称, e.g: SmartMoney
    :param start_date: datetime-like, str
        开始日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param end_date: datetime-like, str, 默认None
        结束日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param month_end: bool, 默认True
        是否只计算月末日期的因子载荷
    :param save: bool, 默认False
        是否保存计算结果
    :return: dict
    --------
        因子经正交化后的因子载荷
        0. date, 为计算日期的下一个交易日
        1. id, 证券代码
        2. factorvalue, 因子载荷
        如果end_date=None,返回start_date对应的因子载荷数据
        如果end_date!=None,返回最后一天的对应的因子载荷数据
        如果没有计算数据,返回None
    """
    start_date = Utils.to_date(start_date)
    if end_date is not None:
        end_date = Utils.to_date(end_date)
        trading_days_series = Utils.get_trading_days(start=start_date,
                                                     end=end_date)
    else:
        trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)

    CRiskModel = Barra()
    orthog_factorloading = {}
    for calc_date in trading_days_series:
        if month_end and (not Utils.is_month_end(calc_date)):
            continue

        # 读取目标因子原始载荷经标准化后的载荷值
        target_factor_path = os.path.join(
            SETTINGS.FACTOR_DB_PATH,
            eval('alphafactor_ct.' + factor_name.upper() + '_CT')['db_file'],
            'standardized', factor_name)
        df_targetfactor_loading = Utils.read_factor_loading(
            target_factor_path,
            Utils.datetimelike_to_str(calc_date, dash=False),
            drop_na=True)
        df_targetfactor_loading.drop(columns='date', inplace=True)
        df_targetfactor_loading.rename(columns={'factorvalue': factor_name},
                                       inplace=True)

        # 读取风险模型中的风格因子载荷矩阵
        df_stylefactor_loading = CRiskModel.get_StyleFactorloading_matrix(
            calc_date)
        df_stylefactor_loading.renmae(columns={'code': 'id'}, inplace=True)

        # 读取alpha因子载荷矩阵数据(经正交化后的载荷值)
        df_alphafactor_loading = pd.DataFrame()
        for alphafactor_name in alphafactor_ct.ALPHA_FACTORS:
            if alphafactor_name == factor_name:
                break
            factorloading_path = os.path.join(
                SETTINGS.FACTOR_DB_PATH,
                eval('alphafactor_ct.' + alphafactor_name.upper() +
                     '_CT')['db_file'], 'orthogonalized', alphafactor_name)
            factor_loading = Utils.read_factor_loading(
                factorloading_path,
                Utils.datetimelike_to_str(calc_date, dash=False),
                drop_na=True)
            factor_loading.drop(columns='date', inplace=True)
            factor_loading.rename(columns={'factorvalue': alphafactor_name},
                                  inplace=True)

            if df_alphafactor_loading.empty:
                df_alphafactor_loading = factor_loading
            else:
                df_alphafactor_loading = pd.merge(left=df_alphafactor_loading,
                                                  right=factor_loading,
                                                  how='inner',
                                                  on='id')

        # 合并目标因子载荷、风格因子载荷与alpha因子载荷
        df_factorloading = pd.merge(left=df_targetfactor_loading,
                                    right=df_stylefactor_loading,
                                    how='inner',
                                    on='id')
        if not df_alphafactor_loading.empty:
            df_factorloading = pd.merge(left=df_stylefactor_loading,
                                        right=df_alphafactor_loading,
                                        how='inner',
                                        on='id')

        # 构建目标因子载荷向量、风格与alpha因子载荷矩阵
        df_factorloading.set_index('id', inplace=True)
        arr_targetfactor_loading = np.array(df_factorloading[factor_name])
        stylealphafactor_names = df_factorloading.columns.tolist()
        stylealphafactor_names.remove(factor_name)
        arr_stylealphafactor_loading = np.array(
            df_factorloading[stylealphafactor_names])

        # 将arr_targetfactor_loading对arr_stylealphafactor_loading进行截面回归, 得到的残差即为正交化后的因子载荷
        Y = arr_targetfactor_loading
        X = sm.add_constant(arr_stylealphafactor_loading)
        model = sm.OLS(Y, X)
        results = model.fit()

        datelabel = Utils.get_trading_days(start=calc_date, ndays=2)[1]
        orthog_factorloading = {
            'date': [datelabel] * len(results.resid),
            'id': df_factorloading.index.tolist(),
            'factorvalue': results.resid
        }

        # 保存正交化后的因子载荷
        if save:
            orthog_factorloading_path = os.path.join(
                SETTINGS.FACTOR_DB_PATH,
                eval('alphafactor_ct.' + factor_name.upper() +
                     '_CT')['db_file'], 'orthogonalized', factor_name)
            Utils.factor_loading_persistent(
                orthog_factorloading_path,
                Utils.datetimelike_to_str(calc_date, dash=False),
                orthog_factorloading, ['date', 'id', 'factorvalue'])

    return orthog_factorloading
示例#23
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷数据
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算LIQUIDITY因子载荷
        dict_raw_liquidity = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            dict_stom = None
            dict_stoq = None
            dict_stoa = None
            dict_raw_liquidity = None
            logging.info('[%s] Calc LIQUIDITY factor loading.' %
                         Utils.datetimelike_to_str(calc_date))
            # 遍历个股,计算个股LIQUIDITY因子值
            s = (calc_date - datetime.timedelta(
                days=risk_ct.LIQUID_CT.listed_days)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            ids = []
            stoms = []
            stoqs = []
            stoas = []
            raw_liquidities = []

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算LIQUIDITY因子值
                for _, stock_info in stock_basics.iterrows():
                    logging.info("[%s] Calc %s's LIQUIDITY factor loading." %
                                 (Utils.datetimelike_to_str(
                                     calc_date, dash=True), stock_info.symbol))
                    liquidity_data = cls._calc_factor_loading(
                        stock_info.symbol, calc_date)
                    if liquidity_data is not None:
                        ids.append(liquidity_data['code'])
                        stoms.append(liquidity_data['stom'])
                        stoqs.append(liquidity_data['stoq'])
                        stoas.append(liquidity_data['stoa'])
                        raw_liquidities.append(liquidity_data['liquidity'])
            else:
                # 采用多进程计算LIQUIDITY因子值
                q = Manager().Queue()
                p = Pool(4)
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    liquidity_data = q.get(True)
                    ids.append(liquidity_data['code'])
                    stoms.append(liquidity_data['stom'])
                    stoqs.append(liquidity_data['stoq'])
                    stoas.append(liquidity_data['stoa'])
                    raw_liquidities.append(liquidity_data['liquidity'])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_stom = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': stoms
            })
            dict_stoq = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': stoqs
            })
            dict_stoa = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': stoas
            })
            dict_raw_liquidity = dict({
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': raw_liquidities
            })
            # 读取Size因子值, 将流动性因子与Size因子正交化
            size_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                            risk_ct.SIZE_CT.db_file)
            df_size = Utils.read_factor_loading(
                size_factor_path,
                Utils.datetimelike_to_str(calc_date, dash=False))
            df_size.drop(columns='date', inplace=True)
            df_size.rename(columns={'factorvalue': 'size'}, inplace=True)
            df_liquidity = pd.DataFrame(
                dict({
                    'id': ids,
                    'liquidity': raw_liquidities
                }))
            df_liquidity = pd.merge(left=df_liquidity,
                                    right=df_size,
                                    how='inner',
                                    on='id')
            arr_liquidity = Utils.normalize_data(
                Utils.clean_extreme_value(
                    np.array(df_liquidity['liquidity']).reshape(
                        (len(df_liquidity), 1))))
            arr_size = Utils.normalize_data(
                Utils.clean_extreme_value(
                    np.array(df_liquidity['size']).reshape(
                        (len(df_liquidity), 1))))
            model = sm.OLS(arr_liquidity, arr_size)
            results = model.fit()
            df_liquidity['liquidity'] = results.resid
            df_liquidity.drop(columns='size', inplace=True)
            df_liquidity.rename(columns={'liquidity': 'factorvalue'},
                                inplace=True)
            df_liquidity['date'] = date_label
            # 保存因子载荷
            if save:
                str_date = Utils.datetimelike_to_str(calc_date, dash=False)
                factor_header = ['date', 'id', 'factorvalue']
                Utils.factor_loading_persistent(cls._db_file,
                                                'stom_{}'.format(str_date),
                                                dict_stom, factor_header)
                Utils.factor_loading_persistent(cls._db_file,
                                                'stoq_{}'.format(str_date),
                                                dict_stoq, factor_header)
                Utils.factor_loading_persistent(cls._db_file,
                                                'stoa_{}'.format(str_date),
                                                dict_stoa, factor_header)
                Utils.factor_loading_persistent(
                    cls._db_file, 'rawliquidity_{}'.format(str_date),
                    dict_raw_liquidity, factor_header)
                Utils.factor_loading_persistent(cls._db_file, str_date,
                                                df_liquidity.to_dict('list'),
                                                factor_header)

            # 暂停180秒
            logging.info('Suspending for 180s.')
            time.sleep(180)
        return dict_raw_liquidity
示例#24
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like or str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式:YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认True
            是否保存至因子数据库
        :param kwargs:
        :return: dict
            因子载荷
        --------
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算筹码分布因子载荷
        dict_cyq = {}
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info('[%s] Calc CYQ factor loading.' %
                         Utils.datetimelike_to_str(calc_date))
            # 遍历个股, 计算个股筹码分布因子值
            s = (calc_date - datetime.timedelta(days=180)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]

            secu_cyq_path = Path(
                factor_ct.FACTOR_DB.db_path, factor_ct.CYQ_CT.db_file,
                'secu_cyq/%s' % calc_date.strftime('%Y-%m-%d'))
            if not secu_cyq_path.exists():
                secu_cyq_path.mkdir()
            ids = []
            rps = []

            # 采用单进程计算筹码分布数据, 及当前价格的相对位置(=当前价格-平均成本)/平均成本
            # for _, stock_info in stock_basics.iterrows():
            #     logging.info("[%s] Calc %s's cyq data." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
            #     secu_cyq = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     if secu_cyq is not None:
            #         secu_code, secu_close, cyq_data = secu_cyq
            #         # 保存个股的筹码分布数据
            #         cyq_data.to_csv(Path(secu_cyq_path, '%s.csv' % secu_code), header=True)
            #         # 计算当前价格的相对位置
            #         avg_cyq = np.sum(np.array(cyq_data.index) * np.array(cyq_data.values))
            #         relative_position = round((secu_close - avg_cyq) / avg_cyq, 4)
            #         ids.append(secu_code)
            #         rps.append(relative_position)

            # 采用多进程进行并行计算筹码分布数据, 及当前价格的相对位置(=当前价格-平均成本)/平均成本
            q = Manager().Queue()  # 队列, 用于进程间通信, 存储每个进程计算的因子载荷
            p = Pool(4)  # 进程池, 最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                secu_cyq = q.get(True)
                secu_code, secu_close, cyq_data = secu_cyq
                # 保存个股的筹码分布数据
                cyq_data.to_csv(Path(secu_cyq_path, '%s.csv' % secu_code),
                                header=True)
                # 计算当前价格的相对位置
                avg_cyq = np.sum(
                    np.array(cyq_data.index) * np.array(cyq_data.values))
                relative_position = round((secu_close - avg_cyq) / avg_cyq, 4)
                ids.append(secu_code)
                rps.append(relative_position)

            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_cyq = {
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': rps
            }
            if save:
                cyq_data_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                             factor_ct.CYQ_CT.db_file,
                                             factor_ct.CYQ_CT.CYQ_rp_file)
                Utils.factor_loading_persistent(
                    cyq_data_path,
                    Utils.datetimelike_to_str(calc_date, dash=False), dict_cyq,
                    ['date', 'id', 'factorvalue'])
            # 休息90秒
            logging.info('Suspending for 100s.')
            time.sleep(100)
        return dict_cyq
示例#25
0
def _calc_MVPFP(factor_name,
                start_date,
                end_date=None,
                month_end=True,
                save=False):
    """
    构建目标因子的最小波动纯因子组合(Minimum Volatility Pure Factor Portfolio, MVPFP)
    Parameters:
    --------
    :param factor_name: str
        alpha因子名称, e.g: SmartMoney
    :param start_date: datetime-like, str
        开始日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param end_date: datetime-like, str, 默认为None
        结束日期, e.g: YYYY-MM-DD, YYYYMMDD
    :param month_end: bool, 默认为True
        是否只计算月末日期的因子载荷
    :param save: bool, 默认为False
        是否保存计算结果
    :return: CWeightHolding类
        最小波动纯因子组合权重数据
    --------
    具体优化算法:暴露1单位目标因子敞口, 同时保持其余所有风险因子的敞口为0, 并具有最小预期波动率的组合
    Min: W'VW
    s.t. W'X_beta = 0
         W'x_target = 1
    其中: W: 最小波动纯因子组合对应的权重
         V: 个股协方差矩阵
         X_beta: 个股风格因子载荷矩阵
         x_target: 个股目标因子载荷向量
    """
    start_date = Utils.to_date(start_date)
    if end_date is None:
        trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
    else:
        end_date = Utils.to_date(end_date)
        trading_days_series = Utils.get_trading_days(start=start_date,
                                                     end=end_date)

    CRiskModel = Barra()
    mvpfp_holding = CWeightHolding()
    for calc_date in trading_days_series:
        if month_end and (not Utils.is_month_end(calc_date)):
            continue
        # 取得/计算calc_date的个股协方差矩阵数据
        stock_codes, arr_stocks_covmat = CRiskModel.calc_stocks_covmat(
            calc_date)
        # 取得个股风格因子载荷矩阵数据
        df_stylefactor_loading = CRiskModel.get_StyleFactorloading_matrix(
            calc_date)
        # df_stylefactor_loading.set_index('code', inplace=True)
        # df_stylefactor_loading = df_stylefactor_loading.loc[stock_codes]    # 按个股顺序重新排列
        # arr_stylefactor_loading = np.array(df_stylefactor_loading)
        # 取得个股目标因子载荷向量数据(正交化后的因子载荷)
        df_targetfactor_loading = _get_factorloading(
            factor_name, calc_date,
            alphafactor_ct.FACTORLOADING_TYPE['ORTHOGONALIZED'])
        df_targetfactor_loading.drop(columns='date', inplace=True)
        df_targetfactor_loading.rename(columns={
            'id': 'code',
            'factorvalue': factor_name
        },
                                       inplace=True)

        df_factorloading = pd.merge(left=df_stylefactor_loading,
                                    right=df_targetfactor_loading,
                                    how='inner',
                                    on='code')
        df_factorloading.set_index('code', inplace=True)

        df_stylefactor_loading = df_factorloading.loc[
            stock_codes, riskfactor_ct.STYLE_RISK_FACTORS]
        arr_stylefactor_laoding = np.array(df_stylefactor_loading)

        df_targetfactor_loading = df_factorloading.loc[stock_codes,
                                                       factor_name]
        arr_targetfactor_loading = np.array(df_targetfactor_loading)

        # 优化计算最小波动纯因子组合权重
        V = arr_stocks_covmat
        X_beta = arr_stylefactor_laoding
        x_target = arr_targetfactor_loading
        N = len(stock_codes)
        w = cvx.Variable((N, 1))
        risk = cvx.quad_form(w, V)
        constraints = [
            cvx.matmul(w.T, X_beta) == 0,
            cvx.matmul(w.T, x_target) == 1
        ]
        prob = cvx.Problem(cvx.Minimize(risk), constraints)
        prob.solve()
        if prob.status == cvx.OPTIMAL:
            datelabel = Utils.datetimelike_to_str(calc_date, dash=False)
            df_holding = pd.DataFrame({
                'date': [datelabel] * len(stock_codes),
                'code': stock_codes,
                'weight': w.value
            })
            mvpfp_holding.from_dataframe(df_holding)
            if save:
                holding_path = os.path.join(
                    SETTINGS.FACTOR_DB_PATH,
                    eval('alphafactor_ct.' + factor_name.upper() +
                         '.CT')['db_file'], 'mvpfp',
                    '{}_{}.csv'.format(factor_name, datelabel))
                mvpfp_holding.save_data(holding_path)
        else:
            raise cvx.SolverError(
                "%s优化计算%s最小纯因子组合失败。" %
                (Utils.datetimelike_to_str(calc_date), factor_name))

    return mvpfp_holding
示例#26
0
 def _calc_factor_loading1(cls, code, calc_date):
     """
     计算指定日期、指定个股筹码分布的四个代理变量以及下一期(下个月)的收益率
     Parameters
     -------
     :param code: str
         个股代码, 如600000或SH600000
     :param calc_date: datetime-like, str
         计算日期, 格式YYYY-MM-DD
     :return: pd.Series
     --------
         个股筹码分布的额四个代理变量
         0. arc: 筹码分布的均值
         1. vrc: 筹码分布的方差
         2. src: 筹码分布的偏度
         3. krc: 筹码分布的峰度
         4. next_ret: 下一期的收益率
         若计算失败, 返回None
     """
     # 读取过去__days天的个股复权日K线行情数据
     df_mkt = Utils.get_secu_daily_mkt(code,
                                       end=calc_date,
                                       ndays=cls.__days,
                                       fq=True,
                                       range_lookup=True)
     if df_mkt is None:
         return None
     if len(df_mkt) < 20:
         return None
     # 按日期降序排列行情数据
     df_mkt.sort_values(by='date', ascending=False, inplace=True)
     # 遍历行情数据, 计算RC(相对资本收益)向量和ATR(调整换手率)向量
     arr_rc = np.zeros(len(df_mkt))
     arr_atr = np.zeros(len(df_mkt))
     p_c = df_mkt.iloc[0]['close']  # 截止日期的收盘价
     for j in range(len(df_mkt)):
         p_avg = df_mkt.iloc[j]['amount'] / df_mkt.iloc[j][
             'vol'] * df_mkt.iloc[j]['factor']
         arr_rc[j] = (p_c - p_avg) / p_c
         tr_j = df_mkt.iloc[j]['turnover1']
         if j == 0:
             arr_atr[j] = tr_j
         else:
             arr_atr[j] = arr_atr[j - 1] / pre_tr * tr_j * (1. - pre_tr)
         pre_tr = tr_j
     arc = np.average(arr_rc, weights=arr_atr)
     if np.isnan(arc):
         return None
     rc_dev = arr_rc - arc
     n = len(df_mkt)
     vrc = n / (n - 1.) * np.sum(
         arr_atr * rc_dev * rc_dev) / np.sum(arr_atr)
     if np.isnan(vrc):
         return None
     src = n / (n - 1.) * np.sum(arr_atr * np.float_power(
         rc_dev, 3)) / np.sum(arr_atr) / np.float_power(vrc, 1.5)
     if np.isnan(src):
         return None
     krc = n / (n - 1.) * np.sum(arr_atr * np.float_power(
         rc_dev, 4)) / np.sum(arr_atr) / np.float_power(vrc, 2)
     if np.isnan(krc):
         return None
     # 计算个股下一期的收益率
     # next_date = calc_date + datetime.timedelta(days=1)
     next_date = Utils.get_trading_days(start=calc_date, ndays=2)[1]
     wday, month_range = calendar.monthrange(next_date.year,
                                             next_date.month)
     date_end = datetime.datetime(next_date.year, next_date.month,
                                  month_range)
     next_ret = Utils.calc_interval_ret(code, start=next_date, end=date_end)
     if next_ret is None:
         return None
     else:
         return pd.Series([arc, vrc, src, krc, next_ret],
                          index=['arc', 'vrc', 'src', 'krc', 'next_ret'])
示例#27
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :param kwargs:
            'multi_proc': bool, True=采用多进程并行计算, False=采用单进程计算, 默认为False
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date, 日期, 为计算日期的下一个交易日
            1: id, 证券代码
            2: factorvalue, 因子载荷
            如果end_date=None,返回start_date对应的因子载荷数据
            如果end_date!=None,返回最后一天的对应的因子载荷数据
            如果没有计算数据,返回None
        """
        # 0.取得交易日序列
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # 取得样本个股信息
        # all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算SMartQ因子载荷
        dict_factor = None
        for calc_date in trading_days_series:
            dict_factor = {'id': [], 'factorvalue': []}
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 1.获取用于读取分钟行情的交易日列表(过去30天的交易日列表,降序排列)
            # trading_days = _get_trading_days(calc_date, 30)
            # trading_days = Utils.get_trading_days(end=calc_date, ndays=30, ascending=False)
            # 2.取得样本个股信息
            # stock_basics = ts.get_stock_basics()
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = Utils.get_stock_basics(s)
            # 3.遍历样本个股代码,计算Smart_Q因子载荷值
            dict_factor = {'date': None, 'id': [], 'factorvalue': []}

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程进行计算
                for _, stock_info in stock_basics.iterrows():
                    # code = '%s%s' % ('SH' if code[:2] == '60' else 'SZ', code)
                    factor_loading = cls._calc_factor_loading(
                        stock_info.symbol, calc_date)
                    print(
                        "[%s]Calculating %s's SmartMoney factor loading = %.4f."
                        % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol,
                           -1.0 if factor_loading is None else factor_loading))
                    if factor_loading is not None:
                        # df_factor.ix[code, 'factorvalue'] = factor_loading
                        dict_factor['id'].append(
                            Utils.code_to_symbol(stock_info.symbol))
                        dict_factor['factorvalue'].append(factor_loading)
            else:
                # 采用多进程并行计算SmartQ因子载荷
                q = Manager().Queue()  # 队列,用于进程间通信,存储每个进程计算的因子载荷值
                p = Pool(4)  # 进程池,最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    smart_q = q.get(True)
                    dict_factor['id'].append(smart_q[0])
                    dict_factor['factorvalue'].append(smart_q[1])

            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_factor['date'] = [date_label] * len(dict_factor['id'])
            # 4.计算去极值标准化后的因子载荷
            df_std_factor = Utils.normalize_data(pd.DataFrame(dict_factor),
                                                 columns='factorvalue',
                                                 treat_outlier=True,
                                                 weight='eq')
            # 5.保存因子载荷至因子数据库
            if save:
                # Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_factor)
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         dict_factor,
                                         'SmartMoney',
                                         factor_type='raw',
                                         columns=['date', 'id', 'factorvalue'])
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         df_std_factor,
                                         'SmartMoney',
                                         factor_type='standardized',
                                         columns=['date', 'id', 'factorvalue'])
            # 休息300秒
            logging.info('Suspending for 360s.')
            time.sleep(360)
        return dict_factor
示例#28
0
    def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期,格式:YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str; 默认为None
            结束日期,格式:YYYY-MM-DD or YYYYMMDD
            如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool, 默认True
            如果为True,则只计算月末时点的因子载荷
        :param save: bool, 默认False
            是否保存至因子数据库
        :return: 因子载荷,pd.DataFrame
        --------
            因子载荷,pd.DataFrame
            0. date: 日期
            1. id: 日期
            2. npg_ttm: 净利润增长率_TTM
            3. opg_ttm: 营业收入增长率_TTM
        """
        # 取得交易日序列及股票基本信息表
        trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算价值因子载荷
        dict_growth = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            dict_growth = {'date': [], 'id': [], 'npg_ttm': [], 'opg_ttm': []}
            # 遍历个股,计算个股成长因子载荷
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]

            # 采用单进程进行计算成长因子
            # for _, stock_info in stock_basics.iterrows():
            #     logging.info("[%s] calc %s's growth factor loading." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
            #     growth_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     if growth_data is not None:
            #         dict_growth['id'].append(Utils.code_to_symbol(stock_info.symbol))
            #         dict_growth['npg_ttm'].append(growth_data['npg_ttm'])
            #         dict_growth['opg_ttm'].append(growth_data['opg_ttm'])

            # 采用多进程并行计算成长因子
            q = Manager().Queue()   # 队列,用于进程间通信,存储每个进程计算的因子载荷
            p = Pool(4)             # 进程池,最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,))
            p.close()
            p.join()
            while not q.empty():
                growth_data = q.get(True)
                dict_growth['id'].append(growth_data['id'])
                dict_growth['npg_ttm'].append(growth_data['npg_ttm'])
                dict_growth['opg_ttm'].append(growth_data['opg_ttm'])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_growth['date'] = [date_label] * len(dict_growth['id'])
            # 保存因子载荷至因子数据库
            if save:
                columns = ['date', 'id', 'npg_ttm', 'opg_ttm']
                Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_growth, columns)
            # 休息120秒
            logging.info('Suspending for 120s.')
            time.sleep(120)
        return dict_growth
示例#29
0
    def calc_factor_loading1(cls,
                             start_date,
                             end_date=None,
                             month_end=True,
                             save=False,
                             **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like or str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式:YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认True
            是否保存至因子数据库
        :param kwargs:
        :return: dict
            因子载荷
        --------
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算筹码分布因子载荷
        dict_cyq = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info('[%s] Calc CYQ factor loading.' %
                         Utils.datetimelike_to_str(calc_date))
            # 遍历个股, 计算个股筹码分布因子值
            df_proxies = DataFrame()
            s = (calc_date - datetime.timedelta(days=365)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]

            trading_day = Utils.get_trading_days(calc_date, ndays=2)[1]
            # 采用单进程计算筹码因子分布的代理变量
            # for _, stock_info in stock_basics.iterrows():
            #     cyq_proxies = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     if cyq_proxies is not None:
            #         logging.info("[%s] %s's cyq proxies = (%0.4f,%0.4f,%0.4f,%0.4f,%0.4f)" % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol, cyq_proxies['arc'], cyq_proxies['vrc'], cyq_proxies['src'], cyq_proxies['krc'], cyq_proxies['next_ret']))
            #         # cyq_proxies['date'] = trading_day
            #         cyq_proxies['id'] = Utils.code_to_symbol(stock_info.symbol)
            #         df_proxies = df_proxies.append(cyq_proxies, ignore_index=True)

            # 采用多进程进行并行计算筹码分布因子的代理变量
            q = Manager().Queue()  # 队列, 用于进程间通信, 存储每个进程计算的因子载荷
            p = Pool(4)  # 进程池, 最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                cyq_proxies = q.get(True)
                # cyq_proxies['date'] = trading_day
                df_proxies = df_proxies.append(cyq_proxies, ignore_index=True)

            # 保存筹码分布代理变量数据
            df_proxies['date'] = trading_day
            proxies_file_path = cls._db_proxies_path + '_%s.csv' % Utils.datetimelike_to_str(
                calc_date, dash=False)
            df_proxies.to_csv(
                proxies_file_path,
                index=False,
                columns=['date', 'id', 'arc', 'vrc', 'src', 'krc', 'next_ret'])

            # 导入筹码分布因子的代理变量数据
            # cyq_proxies_path = cls._db_proxies_path + '_%s.csv' % Utils.datetimelike_to_str(calc_date, dash=False)
            # df_proxies = pd.read_csv(cyq_proxies_path, header=0)

            # 计算marc, 代理变量权重及筹码分布因子载荷
            marc = df_proxies['arc'].median()
            proxies_weight_file = Path(factor_ct.FACTOR_DB.db_path,
                                       factor_ct.CYQ_CT.proxies_weight_file)
            if proxies_weight_file.exists():
                df_proxies_weight = pd.read_csv(proxies_weight_file,
                                                header=0,
                                                parse_dates=[0])
                df_proxies_weight = df_proxies_weight[
                    df_proxies_weight.date < calc_date].tail(24)
                if len(df_proxies_weight) < 24:
                    with open(proxies_weight_file, 'a', newline='') as f:
                        csv_writer = csv.writer(f)
                        csv_writer.writerow([
                            calc_date.strftime('%Y-%m-%d'), marc, 0, 0, 0, 0, 0
                        ])
                else:
                    df_proxies_data = DataFrame()
                    if marc > 0:
                        df_proxies_weight = df_proxies_weight[
                            df_proxies_weight.marc > 0]
                    elif marc < 0:
                        df_proxies_weight = df_proxies_weight[
                            df_proxies_weight.marc < 0]
                    for _, weight_info in df_proxies_weight.iterrows():
                        proxies_file_path = cls._db_proxies_path + '_%s.csv' % Utils.datetimelike_to_str(
                            weight_info['date'], False)
                        df_proxies_data = df_proxies_data.append(
                            pd.read_csv(proxies_file_path, header=0),
                            ignore_index=True)
                    next_ret = np.array(df_proxies_data['next_ret'])
                    cyq_data = np.array(
                        df_proxies_data[['arc', 'vrc', 'src', 'krc']])
                    cyq_data = sm.add_constant(cyq_data)
                    cyq_model = sm.OLS(next_ret, cyq_data)
                    cyq_result = cyq_model.fit()
                    cyq_weights = np.around(cyq_result.params, 6)
                    with open(proxies_weight_file, 'a', newline='') as f:
                        csv_writer = csv.writer(f)
                        csv_writer.writerow([
                            calc_date.strftime('%Y-%m-%d'), marc,
                            cyq_weights[0], cyq_weights[1], cyq_weights[2],
                            cyq_weights[3], cyq_weights[4]
                        ])
                    # 计算筹码分布因子载荷
                    arr_proxies = np.array(
                        df_proxies[['arc', 'vrc', 'src', 'krc']])
                    arr_weight = np.array([
                        cyq_weights[1], cyq_weights[2], cyq_weights[3],
                        cyq_weights[4]
                    ]).reshape((4, 1))
                    intercept = cyq_weights[0]
                    arr_cyq = np.around(
                        np.dot(arr_proxies, arr_weight) + intercept, 6)
                    dict_cyq = {
                        'date': list(df_proxies['date']),
                        'id': list(df_proxies['id']),
                        'factorvalue': list(arr_cyq.reshape((len(arr_cyq), )))
                    }
                    # 保存因子载荷至因子数据库
                    if save:
                        Utils.factor_loading_persistent(
                            cls._db_file,
                            calc_date.strftime('%Y%m%d'),
                            dict_cyq,
                            columns=['date', 'id', 'factorvalue'])
            else:
                with open(proxies_weight_file, 'w', newline='') as f:
                    csv_writer = csv.writer(f)
                    csv_writer.writerow([
                        'date', 'marc', 'intcpt', 'arc_w', 'vrc_w', 'src_w',
                        'krc_w'
                    ])
                    csv_writer.writerow(
                        [calc_date.strftime('%Y-%m-%d'), marc, 0, 0, 0, 0, 0])
            # 休息300秒
            logging.info('Suspending for 200s.')
            time.sleep(200)
示例#30
0
文件: Beta.py 项目: rlcjj/MultiFactor
    def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认True
            是否保存至因子数据库
        :param kwargs:
        :return: dict
            因子载荷
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
        # all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算筹码分布因子载荷
        dict_beta = {}
        dict_hsigma = {}
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info('[%s] Calc BETA factor loading.' % Utils.datetimelike_to_str(calc_date))
            # 遍历个股, 计算个股BETA因子值
            # s = (calc_date - datetime.timedelta(days=risk_ct.DBETA_CT.listed_days)).strftime('%Y%m%d')
            # stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            s = calc_date - datetime.timedelta(days=risk_ct.DBETA_CT.listed_days)
            stock_basics = Utils.get_stock_basics(s, False)
            ids = []        # 个股代码list
            betas = []      # BETA因子值
            hsigmas = []    # HSIGMA因子值

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算BETA因子和HSIGMA因子值,
                for _, stock_info in stock_basics.iterrows():
                    logging.debug("[%s] Calc %s's BETA and HSIGMA factor data." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
                    beta_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
                    if beta_data is None:
                        ids.append(Utils.code_to_symbol(stock_info.symbol))
                        betas.append(np.nan)
                        hsigmas.append(np.nan)
                    else:
                        ids.append(beta_data['code'])
                        betas.append(beta_data['beta'])
                        hsigmas.append(beta_data['hsigma'])
            else:
                # 采用多进程并行计算BETA因子和HSIGMA因子值
                q = Manager().Queue()   # 队列, 用于进程间通信, 存储每个进程计算的因子载荷
                p = Pool(SETTINGS.CONCURRENCY_KERNEL_NUM)             # 进程池, 最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,))
                p.close()
                p.join()
                while not q.empty():
                    beta_data = q.get(True)
                    ids.append(beta_data['code'])
                    betas.append(beta_data['beta'])
                    hsigmas.append(beta_data['hsigma'])

            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_beta = {'date': [date_label]*len(ids), 'id': ids, 'factorvalue': betas}
            dict_hsigma = {'date': [date_label]*len(ids), 'id': ids, 'factorvalue': hsigmas}
            if save:
                Utils.factor_loading_persistent(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_beta, ['date', 'id', 'factorvalue'])
                hsigma_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.HSIGMA_CT.db_file)
                Utils.factor_loading_persistent(hsigma_path, Utils.datetimelike_to_str(calc_date, dash=False), dict_hsigma, ['date', 'id', 'factorvalue'])
            # 休息180秒
            # logging.info('Suspending for 180s.')
            # time.sleep(180)
        return dict_beta