def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股BETA因子载荷 Parameters: -------- :param code: str 个股代码, 如600000或SH600000 :param calc_date: datetime-like, str 计算日期, 格式YYYY-MM-DD :return: pd.Series -------- 个股的BETA因子和HSIGMA因子载荷 0. code: 个股代码 1. beta: BETA因子载荷 2. hsigma: HSIGMA因子载荷 若计算失败, 返回None """ # 取得个股复权行情数据 df_secu_quote = Utils.get_secu_daily_mkt( code, end=calc_date, ndays=risk_ct.BETA_CT.trailing + 1, fq=True) if df_secu_quote is None: return None df_secu_quote.reset_index(drop=True, inplace=True) # 取得基准复权行情数据 benchmark_code = risk_ct.BETA_CT.benchmark df_benchmark_quote = Utils.get_secu_daily_mkt(benchmark_code, end=calc_date, fq=True) if df_benchmark_quote is None: return None df_benchmark_quote = df_benchmark_quote[ df_benchmark_quote['date'].isin(list(df_secu_quote['date']))] df_benchmark_quote.reset_index(drop=True, inplace=True) # 计算个股和基准的日收益率序列 arr_secu_close = np.array(df_secu_quote.iloc[1:]['close']) arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close']) arr_secu_daily_ret = arr_secu_close / arr_secu_preclose - 1. arr_benchmark_close = np.array(df_benchmark_quote.iloc[1:]['close']) arr_benchmark_preclose = np.array( df_benchmark_quote.shift(1).iloc[1:]['close']) arr_benchmark_daily_ret = arr_benchmark_close / arr_benchmark_preclose - 1. # 计算权重(指数移动加权平均) T = len(arr_benchmark_daily_ret) time_spans = sorted(range(T), reverse=True) alpha = 1 - np.exp(np.log(0.5) / risk_ct.BETA_CT.half_life) x = [1 - alpha] * T y = [alpha] * (T - 1) y.insert(0, 1) weights = np.float_power(x, time_spans) * y # 采用加权最小二乘法计算Beta因子载荷及hsigma arr_benchmark_daily_ret = sm.add_constant(arr_benchmark_daily_ret) cap_model = sm.WLS(arr_secu_daily_ret, arr_benchmark_daily_ret, weights=weights) result = cap_model.fit() beta = result.params[1] hsigma = np.sqrt(result.mse_resid) return pd.Series([Utils.code_to_symbol(code), beta, hsigma], index=['code', 'beta', 'hsigma'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的STOA因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like or str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的STOA因子载荷 0. code 1. stoa 如果计算失败, 返回None """ # 读取个股过去252个交易日的日行情数据(非复权) df_mkt_data = Utils.get_secu_daily_mkt(code, end=calc_date, ndays=252, fq=False) if df_mkt_data is None or df_mkt_data.empty: return None # stoa days = risk_ct.STOA_CT.month_days * risk_ct.STOA_CT.months if len(df_mkt_data) >= days: stoa = math.log(df_mkt_data.iloc[-days:]['turnover1'].sum() / risk_ct.STOA_CT.months) else: stoa = math.log(df_mkt_data['turnover1'].sum() / risk_ct.STOA_CT.months) return pd.Series([Utils.code_to_symbol(code), stoa], index=['code', 'stoa'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股LNCAP因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的LNCAP因子载荷 0. code 1. lncap 如果计算失败, 返回None """ # 取得个股的非复权收盘价 df_secu_quote = Utils.get_secu_daily_mkt(code, start=calc_date, fq=False, range_lookup=True) if df_secu_quote is None: return None secu_close = df_secu_quote['close'] # 取得个股最新的A股总股本数据 cap_struct = Utils.get_cap_struct(code, calc_date) if cap_struct is None: return None total_share = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h # 计算总市值的自然对数值 lncap = np.log(secu_close * total_share) liquid_cap = secu_close * cap_struct.liquid_a return pd.Series([Utils.code_to_symbol(code), lncap, liquid_cap], index=['code', 'lncap', 'liquid_cap'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的LIQUIDITY因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的LIQUIDILITY因子载荷 0. code 1. stom 月度换手率 2. stoq 季度换手率 3. stoa 年度换手率 4. liquidity 如果就是按失败, 返回None """ # 读取个股过去252个交易日的日行情数据(非复权) stom_days = risk_ct.LIQUID_CT.stom_days stoq_months = risk_ct.LIQUID_CT.stoq_months stoa_months = risk_ct.LIQUID_CT.stoa_months df_mkt_data = Utils.get_secu_daily_mkt(code, end=calc_date, ndays=stoa_months * stom_days, fq=False) if df_mkt_data is None or df_mkt_data.empty: return None # stom if len(df_mkt_data) >= stom_days: stom = math.log(df_mkt_data.iloc[-stom_days:]['turnover1'].sum()) else: stom = math.log(df_mkt_data['turnover1'].sum()) # stoq stoq_days = stom_days * stoq_months if len(df_mkt_data) >= stoq_days: stoq = math.log(df_mkt_data.iloc[-stoq_days:]['turnover1'].sum() / stoq_months) else: stoq = math.log(df_mkt_data['turnover1'].sum() / stoq_months) # stoa stoa = math.log(df_mkt_data['turnover1'].sum() / stoa_months) # liquidity = 0.35*stom + 0.35*stoq + 0.3*stoa stom_weight = risk_ct.LIQUID_CT.stom_weight stoq_weight = risk_ct.LIQUID_CT.stoq_weight stoa_weight = risk_ct.LIQUID_CT.stoa_weight liquidity = stom_weight * stom + stoq_weight * stoq + stoa_weight * stoa return pd.Series( [Utils.code_to_symbol(code), stom, stoq, stoa, liquidity], index=['code', 'stom', 'stoq', 'stoa', 'liquidity'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股筹码分布数据 Parameters: -------- :param code: str 个股代码, 如600000或SH600000 :param calc_date: datetime-like, str 计算日期, 格式YYYY-MM-DD :return: tuple(code, close, cyq_data) -------- 1. code 2. close: float 个股在calc_date的收盘价 3. cyq_data: pd.Series 个股从IPO开始、至calc_date为止的筹码分布数据 Series的index为筹码价格, values为对应每个筹码价格的持仓比例 若计算失败, 返回None """ # 读取个股IPO数据 ipo_data = Utils.get_ipo_info(code) if ipo_data is None: return None if ipo_data['发行价格'][:-1] == '--': return None ipo_price = float(ipo_data['发行价格'][:-1]) # 读取个股上市以来的日复权行情数据 mkt_data = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True) secu_close = mkt_data.iloc[-1]['close'] # 计算每天的均价 mkt_data['vwap'] = np.around( mkt_data['amount'] / mkt_data['vol'] * mkt_data['factor'], 2) mkt_data.dropna(axis=0, how='any', inplace=True) # 行情数据按日期降序排列 mkt_data.sort_values(by='date', ascending=False, inplace=True) mkt_data.reset_index(drop=True, inplace=True) # 计算筹码分布 cyq_data = mkt_data[['vwap', 'turnover1']] cyq_data = cyq_data.append(Series([ipo_price, 0], index=['vwap', 'turnover1']), ignore_index=True) cyq_data['minusTR'] = 1 - cyq_data['turnover1'] cyq_data['cumprod_TR'] = cyq_data['minusTR'].cumprod().shift(1) cyq_data.loc[0, 'cumprod_TR'] = 1. cyq_data['cyq'] = cyq_data['turnover1'] * cyq_data['cumprod_TR'] secu_cyq = cyq_data['cyq'].groupby(cyq_data['vwap']).sum() # 如果筹码价格数量小于30个, 返回None if len(secu_cyq) < 30: return None secu_cyq = secu_cyq[secu_cyq.values > 0.00001] return (Utils.code_to_symbol(code), secu_close, secu_cyq)
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的价值因子,包含ep_ttm, bp_lr, ocf_ttm Parameters: -------- :param code: str 个股代码:如600000或SH600000 :param calc_date: datetime-like or str 计算日期,格式YYYY-MM-DD, YYYYMMDD :return: pd.Series -------- 价值类因子值 0. ep_ttm: TTM净利润/总市值 1. bp_lr: 净资产(最新财报)/总市值 2. ocf_ttm: TTM经营性现金流/总市值 若计算失败,返回None """ code = Utils.code_to_symbol(code) calc_date = Utils.to_date(calc_date) # 读取TTM财务数据 ttm_fin_data = Utils.get_ttm_fin_basic_data(code, calc_date) if ttm_fin_data is None: return None # 读取最新财报数据 report_date = Utils.get_fin_report_date(calc_date) fin_basic_data = Utils.get_fin_basic_data(code, report_date) if fin_basic_data is None: return None # 计算总市值 mkt_daily = Utils.get_secu_daily_mkt(code, calc_date, fq=False, range_lookup=True) if mkt_daily.shape[0] == 0: return None cap_struct = Utils.get_cap_struct(code, calc_date) if cap_struct is None: return None total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h total_mkt_cap = total_cap * mkt_daily.close # 计算价值类因子 ep_ttm = ttm_fin_data[ 'NetProfit'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap ocf_ttm = ttm_fin_data[ 'NetOperateCashFlow'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap bp_lr = fin_basic_data[ 'ShareHolderEquity'] * util_ct.FIN_DATA_AMOUNT_UNIT / total_mkt_cap return Series([round(ep_ttm, 6), round(bp_lr, 6), round(ocf_ttm, 6)], index=['ep_ttm', 'bp_lr', 'ocf_ttm'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股RSTR因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的RSTR因子载荷 0. code 1. rstr 如果计算失败, 返回None """ # 取得个股复权行情数据 df_secu_quote = Utils.get_secu_daily_mkt( code, end=calc_date, ndays=risk_ct.RSTR_CT.trailing_start + 1, fq=True) if df_secu_quote is None: return None if len(df_secu_quote) < risk_ct.RSTR_CT.half_life * 2: return None df_secu_quote = df_secu_quote.head( len(df_secu_quote) - risk_ct.RSTR_CT.trailing_end) df_secu_quote.reset_index(drop=True, inplace=True) # 计算个股的日对数收益率 arr_secu_close = np.array(df_secu_quote.iloc[1:]['close']) arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close']) arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose) # 计算权重(指数移动加权平均) T = len(arr_secu_daily_ret) time_spans = sorted(range(T), reverse=True) alpha = 1 - np.exp(np.log(0.5) / risk_ct.RSTR_CT.half_life) x = [1 - alpha] * T y = [alpha] * (T - 1) y.insert(0, 1) weights = np.float_power(x, time_spans) * y # 计算RSTR rstr = np.sum(arr_secu_daily_ret * weights) return pd.Series([Utils.code_to_symbol(code), rstr], index=['code', 'rstr'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股DASTD因子载荷 Parameters: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的DASTD因子载荷 0. code 1. dastr 如果计算失败, 返回None """ # 取得个股复权行情数据 df_secu_quote = Utils.get_secu_daily_mkt( code, end=calc_date, ndays=risk_ct.DASTD_CT.trailing + 1, fq=True) if df_secu_quote is None: return None # 如果行情数据长度小于trailing的一半(即126个交易日),那么返回None if len(df_secu_quote) < int(risk_ct.DASTD_CT.trailing / 2): return None df_secu_quote.reset_index(drop=True, inplace=True) # 计算个股的日对数收益率序列及收益率均值 arr_secu_close = np.array(df_secu_quote.iloc[1:]['close']) arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close']) arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose) avg_daily_ret = np.mean(arr_secu_daily_ret) # 计算权重(指数移动加权平均) T = len(arr_secu_daily_ret) time_spans = sorted(range(T), reverse=True) alpha = 1 - np.exp(np.log(0.5) / risk_ct.DASTD_CT.half_life) x = [1 - alpha] * T y = [alpha] * (T - 1) y.insert(0, 1) weights = np.float_power(x, time_spans) * y # 计算个股DASTD因子值 dastd = np.sqrt( np.sum((arr_secu_daily_ret - avg_daily_ret)**2 * weights)) return pd.Series([Utils.code_to_symbol(code), dastd], index=['code', 'dastd'])
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股的规模因子值 Parameters: -------- :param code: str 个股代码,如600000、SH600000 :param calc_date: datetime-like, str 规模因子计算日期,格式YYYY-MM-DD或YYYYMMDD :return: pd.Series -------- 个股规模因子值,各个index对应的含义如下: 0. LnTotalMktCap: 总市值对数 1. LnLiquidMktCap: 流通市值对数 若计算失败,返回None """ # 取得证券截止指定日期最新的非复权行情数据 code = Utils.code_to_symbol(code) calc_date = Utils.to_date(calc_date) mkt_daily = Utils.get_secu_daily_mkt(code, calc_date, fq=False, range_lookup=True) if mkt_daily.shape[0] == 0: return None # 取得证券截止指定日期前最新的股本结构数据 cap_struct = Utils.get_cap_struct(code, calc_date) if cap_struct is None: return None # 计算证券的规模因子 scale_factor = Series() total_cap = cap_struct.total - cap_struct.liquid_b - cap_struct.liquid_h scale_factor['LnTotalMktCap'] = math.log(total_cap * mkt_daily.close) scale_factor['LnLiquidMktCap'] = math.log(cap_struct.liquid_a * mkt_daily.close) return scale_factor
def smartq_backtest(start, end): """ SmartQ因子的历史回测 Parameters: -------- :param start: datetime-like, str 回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初 :param end: datetime-like, str 回测结束日期,格式:YYYY-MM-DD :return: """ # 取得开始结束日期间的交易日序列 trading_days = Utils.get_trading_days(start, end) # 读取截止开始日期前最新的组合回测数据 prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1) backtest_path = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.SMARTMONEY_CT.backtest_path) factor_data, port_nav = Utils.get_backtest_data(backtest_path, trading_days.iloc[0]) # factor_data = None # 记录每次调仓时最新入选个股的SmartQ因子信息,pd.DataFrame<date,factorvalue,id,buprice> if port_nav is None: port_nav = DataFrame({ 'date': [prev_trading_day.strftime('%Y-%m-%d')], 'nav': [1.0] }) # 遍历交易日,如果是月初,则读取SmartQ因子载荷值,进行调仓;如果不是月初,则进行组合估值 t = 0 # 记录调仓次数 for trading_day in trading_days: if factor_data is None: nav = port_nav[port_nav.date == prev_trading_day.strftime( '%Y-%m-%d')].iloc[0].nav else: nav = port_nav[port_nav.date == factor_data.iloc[0].date].iloc[0].nav interval_ret = 0.0 # 月初进行调仓 if Utils.is_month_start(trading_day): logging.info('[%s] 月初调仓.' % Utils.datetimelike_to_str(trading_day, True)) # 调仓前,先计算组合按均价卖出原先组合个股在当天的估值 if factor_data is not None: for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=True) if daily_mkt.date == trading_day.strftime('%Y-%m-%d'): vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor else: vwap_price = daily_mkt.close interval_ret += vwap_price / factor_info.buyprice - 1.0 interval_ret /= float(len(factor_data)) nav *= (1.0 + interval_ret) # 读取factor_data factor_data = Utils.read_factor_loading( SmartMoney.get_db_file(), Utils.datetimelike_to_str(prev_trading_day, False)) # 遍历factor_data, 计算每个个股过去20天的涨跌幅,并剔除在调仓日没有正常交易(如停牌)及涨停的个股 ind_to_be_deleted = [] factor_data['ret20'] = np.zeros(len(factor_data)) for ind, factor_info in factor_data.iterrows(): trading_status = Utils.trading_status(factor_info.id, trading_day) if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp: ind_to_be_deleted.append(ind) fret20 = Utils.calc_interval_ret(factor_info.id, end=prev_trading_day, ndays=20) if fret20 is None: if ind not in ind_to_be_deleted: ind_to_be_deleted.append(ind) else: factor_data.loc[ind, 'ret20'] = fret20 factor_data = factor_data.drop(ind_to_be_deleted, axis=0) # 对factor_data过去20天涨跌幅降序排列,剔除涨幅最大的20%个股 k = int(factor_data.shape[0] * 0.2) factor_data = factor_data.sort_values(by='ret20', ascending=False).iloc[k:] del factor_data['ret20'] # 删除ret20列 # 对factor_data按因子值升序排列,取前10%个股 factor_data = factor_data.sort_values(by='factorvalue', ascending=True) k = int(factor_data.shape[0] * 0.1) factor_data = factor_data.iloc[:k] # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益 factor_data['buyprice'] = 0.0 interval_ret = 0.0 for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=False) assert len(daily_mkt) > 0 factor_data.loc[ ind, 'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor interval_ret += daily_mkt.close / factor_data.loc[ ind, 'buyprice'] - 1.0 interval_ret /= float(factor_data.shape[0]) nav *= (1.0 + interval_ret) # 保存factor_data port_data_path = os.path.join( SETTINGS.FACTOR_DB_PATH, alphafactor_ct.SMARTMONEY_CT.backtest_path, 'port_data_%s.csv' % Utils.datetimelike_to_str(trading_day, False)) factor_data.to_csv(port_data_path, index=False) t += 1 if t % 6 == 0: logging.info('Suspended for 300s.') time.sleep(300) else: # 非调仓日,对组合进行估值 logging.info('[%s] 月中估值.' % Utils.datetimelike_to_str(trading_day, True)) if factor_data is not None: for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=True) interval_ret += daily_mkt.close / factor_info.buyprice - 1.0 interval_ret /= float(factor_data.shape[0]) nav *= (1.0 + interval_ret) # 添加nav port_nav = port_nav.append(Series({ 'date': Utils.datetimelike_to_str(trading_day, True), 'nav': nav }), ignore_index=True) # 设置prev_trading_day prev_trading_day = trading_day # 保存port_nav port_nav_path = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.SMARTMONEY_CT.backtest_path, 'port_nav.csv') port_nav.to_csv(port_nav_path, index=False)
def _calc_factor_loading(cls, code, calc_date): """ Parameter: -------- :param code: str 个股代码, 如SH600000, 600000 :param calc_date: datetime-like, str 计算日期, 格式: YYYY-MM-DD :return: pd.Series -------- 个股的CMRA因子载荷 0. code 1. cmra 如果计算失败, 返回None """ # 取得个股日复权行情数据 # df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date,ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1, fq=True) # if df_secu_quote is None: # return None # if len(df_secu_quote) < risk_ct.CMRA_CT.listed_days: # return None # df_secu_quote.reset_index(drop=True, inplace=True) # 计算个股的日对数收益率序列 # arr_secu_close = np.array(df_secu_quote.iloc[1:]['close']) # arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close']) # arr_secu_daily_ret = np.log(arr_secu_close / arr_secu_preclose) # 每个月计算累积收益率 # z = [] # for t in range(1, risk_ct.CMRA_CT.trailing+1): # k = t * risk_ct.CMRA_CT.days_scale - 1 # if k > len(arr_secu_daily_ret) - 1: # k = len(arr_secu_daily_ret) - 1 # z.append(np.sum(arr_secu_daily_ret[:k])) # break # else: # z.append(np.sum(arr_secu_daily_ret[:k])) # 计算每个月的个股价格变化率(1+r) # z = [] # for t in range(1, risk_ct.CMRA_CT.trailing+1): # k = t * risk_ct.CMRA_CT.days_scale # if k > len(df_secu_quote)-1: # k = len(df_secu_quote)-1 # z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close']) # break # else: # z.append(df_secu_quote.iloc[k]['close']/df_secu_quote.iloc[0]['close']) # cmra = np.log(max(z)) - np.log(min(z)) # 取得交易日序列 trading_days = Utils.get_trading_days(end=calc_date, ndays=risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale+1) trading_days = [day.strftime('%Y-%m-%d') for day in trading_days] # 取得个股复权行情数据 df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date, fq=True) # 提取相应交易日的个股复权行情数据 df_secu_quote = df_secu_quote[df_secu_quote['date'].isin(trading_days)] df_secu_quote.reset_index(drop=True, inplace=True) # 计算个股每个月的个股价格变化率 z = [] if len(df_secu_quote) < int(risk_ct.CMRA_CT.trailing*risk_ct.CMRA_CT.days_scale/2): # 如果提取的个股复权行情长度小于所需时间长度的一半(126个交易日), 返回None return None else: prev_trading_day = df_secu_quote.iloc[0]['date'] for t in range(1, risk_ct.CMRA_CT.trailing+1): k = t * risk_ct.CMRA_CT.days_scale trading_day = trading_days[k] if trading_day < df_secu_quote.iloc[0]['date']: continue # try: secu_trading_day = df_secu_quote[df_secu_quote['date'] <= trading_day].iloc[-1]['date'] if secu_trading_day <= prev_trading_day: continue else: ret = df_secu_quote[df_secu_quote['date']==secu_trading_day].iloc[0]['close']/df_secu_quote.iloc[0]['close'] z.append(ret) prev_trading_day = secu_trading_day # except Exception as e: # print(e) cmra = math.log(max(z)) - math.log(min(z)) return pd.Series([Utils.code_to_symbol(code), cmra], index=['code', 'cmra'])
def _get_prevN_years_finbasicdata(date, code, years): """ 读取过去n年的主要财务指标数据, 其中每股数据会经过复权因子调整 :param date: datetime-like 日期 :param code: str 个股代码, 格式: SH600000 :param years: int 返回的报告期年数 :return: list of pd.Series """ year = date.year month = date.month if month in (1, 2, 3, 4): # report_dates = [datetime.datetime(year-5, 12, 31), # datetime.datetime(year-4, 12, 31), # datetime.datetime(year-3, 12, 31), # datetime.datetime(year-2, 12, 31)] report_dates = [ datetime.datetime(year - n, 12, 31) for n in range(years, 1, -1) ] is_ttm = True elif month in (5, 6, 7, 8): # report_dates = [datetime.datetime(year-5, 12, 31), # datetime.datetime(year-4, 12, 31), # datetime.datetime(year-3, 12, 31), # datetime.datetime(year-2, 12, 31), # datetime.datetime(year-1, 12, 31)] report_dates = [ datetime.datetime(year - n, 12, 31) for n in range(years, 0, -1) ] is_ttm = False else: # report_dates = [datetime.datetime(year-4, 12, 31), # datetime.datetime(year-3, 12, 31), # datetime.datetime(year-2, 12, 31), # datetime.datetime(year-1, 12, 31)] report_dates = [ datetime.datetime(year - n, 12, 31) for n in range(years - 1, 0, -1) ] is_ttm = True df_mkt_data = Utils.get_secu_daily_mkt(code, end=date, fq=True) # 个股复权行情, 用于调整每股数据 prevN_years_finbasicdata = [] for report_date in report_dates: fin_basic_data = Utils.get_fin_basic_data(code, report_date, date_type='report_date') if fin_basic_data is None: return None fin_basic_data = fin_basic_data.to_dict() df_extract_mkt = df_mkt_data[ df_mkt_data.date <= report_date.strftime('%Y-%m-%d')] if not df_extract_mkt.empty: fq_factor = df_extract_mkt.iloc[-1]['factor'] # 调整每股数据 fin_basic_data[ 'BasicEPS_adj'] = fin_basic_data['BasicEPS'] * fq_factor fin_basic_data['UnitNetAsset_adj'] = fin_basic_data[ 'UnitNetAsset'] * fq_factor fin_basic_data['UnitNetOperateCashFlow_adj'] = fin_basic_data[ 'UnitNetOperateCashFlow'] * fq_factor # 计算调整后的主营业务收入 fin_basic_data['MainOperateRevenue_adj'] = fin_basic_data[ 'MainOperateRevenue'] / fq_factor else: fin_basic_data['BasicEPS_adj'] = fin_basic_data['BasicEPS'] fin_basic_data['UnitNetAsset_adj'] = fin_basic_data['UnitNetAsset'] fin_basic_data['UnitNetOperateCashFlow_adj'] = fin_basic_data[ 'UnitNetOperateCashFlow'] fin_basic_data['MainOperateRevenue_adj'] = fin_basic_data[ 'MainOperateRevenue'] prevN_years_finbasicdata.append(fin_basic_data) if is_ttm: ttm_fin_basic_data = Utils.get_ttm_fin_basic_data(code, date) if ttm_fin_basic_data is None: return None ttm_fin_basic_data = ttm_fin_basic_data.to_dict() df_extract_mkt = df_mkt_data[ df_mkt_data.date <= ttm_fin_basic_data['ReportDate'].strftime( '%Y-%m-%d')] if not df_extract_mkt.empty: fq_factor = df_extract_mkt.iloc[-1]['factor'] # 调整每股数据 ttm_fin_basic_data[ 'BasicEPS_adj'] = ttm_fin_basic_data['BasicEPS'] * fq_factor # 计算调整后的主营业务收入 ttm_fin_basic_data['MainOperateRevenue_adj'] = ttm_fin_basic_data[ 'MainOperateRevenue'] / fq_factor else: ttm_fin_basic_data['BasicEPS_adj'] = ttm_fin_basic_data['BasicEPS'] ttm_fin_basic_data['MainOperateRevenue_adj'] = ttm_fin_basic_data[ 'MainOperateRevenue'] prevN_years_finbasicdata.append(ttm_fin_basic_data) return prevN_years_finbasicdata
def _calc_factor_loading1(cls, code, calc_date): """ 计算指定日期、指定个股筹码分布的四个代理变量以及下一期(下个月)的收益率 Parameters ------- :param code: str 个股代码, 如600000或SH600000 :param calc_date: datetime-like, str 计算日期, 格式YYYY-MM-DD :return: pd.Series -------- 个股筹码分布的额四个代理变量 0. arc: 筹码分布的均值 1. vrc: 筹码分布的方差 2. src: 筹码分布的偏度 3. krc: 筹码分布的峰度 4. next_ret: 下一期的收益率 若计算失败, 返回None """ # 读取过去__days天的个股复权日K线行情数据 df_mkt = Utils.get_secu_daily_mkt(code, end=calc_date, ndays=cls.__days, fq=True, range_lookup=True) if df_mkt is None: return None if len(df_mkt) < 20: return None # 按日期降序排列行情数据 df_mkt.sort_values(by='date', ascending=False, inplace=True) # 遍历行情数据, 计算RC(相对资本收益)向量和ATR(调整换手率)向量 arr_rc = np.zeros(len(df_mkt)) arr_atr = np.zeros(len(df_mkt)) p_c = df_mkt.iloc[0]['close'] # 截止日期的收盘价 for j in range(len(df_mkt)): p_avg = df_mkt.iloc[j]['amount'] / df_mkt.iloc[j][ 'vol'] * df_mkt.iloc[j]['factor'] arr_rc[j] = (p_c - p_avg) / p_c tr_j = df_mkt.iloc[j]['turnover1'] if j == 0: arr_atr[j] = tr_j else: arr_atr[j] = arr_atr[j - 1] / pre_tr * tr_j * (1. - pre_tr) pre_tr = tr_j arc = np.average(arr_rc, weights=arr_atr) if np.isnan(arc): return None rc_dev = arr_rc - arc n = len(df_mkt) vrc = n / (n - 1.) * np.sum( arr_atr * rc_dev * rc_dev) / np.sum(arr_atr) if np.isnan(vrc): return None src = n / (n - 1.) * np.sum(arr_atr * np.float_power( rc_dev, 3)) / np.sum(arr_atr) / np.float_power(vrc, 1.5) if np.isnan(src): return None krc = n / (n - 1.) * np.sum(arr_atr * np.float_power( rc_dev, 4)) / np.sum(arr_atr) / np.float_power(vrc, 2) if np.isnan(krc): return None # 计算个股下一期的收益率 # next_date = calc_date + datetime.timedelta(days=1) next_date = Utils.get_trading_days(start=calc_date, ndays=2)[1] wday, month_range = calendar.monthrange(next_date.year, next_date.month) date_end = datetime.datetime(next_date.year, next_date.month, month_range) next_ret = Utils.calc_interval_ret(code, start=next_date, end=date_end) if next_ret is None: return None else: return pd.Series([arc, vrc, src, krc, next_ret], index=['arc', 'vrc', 'src', 'krc', 'next_ret'])
def apm_backtest(start, end, pure_factor=False): """ APM因子的历史回测 Parameters: -------- :param start: datetime-like, str 回测开始日期,格式:YYYY-MM-DD,开始日期应该为月初的前一个交易日,即月末交易日 :param end: datetime-like, str 回测结束日期,格式:YYYY-MM-DD :param pure_factor: bool, 默认False 是否是对纯净因子做回测 :return: """ # 取得开始结束日期间的交易日数据 trading_days = Utils.get_trading_days(start, end) # 读取截止开始日期前最新的组合回测数据 prev_trading_day = Utils.get_prev_n_day(trading_days.iloc[0], 1) if pure_factor: backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_backtest_path) else: backtest_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.backtest_path) factor_data, port_nav = Utils.get_backtest_data(backtest_path, trading_days.iloc[0]) # factor_data = None # 记录每次调仓时最新入选个股的APM因子信息,pd.DataFrame<date,factorvalue,id,buyprice> if port_nav is None: port_nav = DataFrame({ 'date': [prev_trading_day.strftime('%Y-%m-%d')], 'nav': [1.0] }) # 遍历交易日,如果是月初,则读取APM因子载荷值;如果不是月初,则进行组合估值 for trading_day in trading_days: if factor_data is None: nav = port_nav[port_nav.date == prev_trading_day.strftime( '%Y-%m-%d')].iloc[0].nav else: nav = port_nav[port_nav.date == factor_data.iloc[0].date].iloc[0].nav interval_ret = 0.0 # 月初进行调仓 if Utils.is_month_start(trading_day): logging.info('[%s] 月初调仓.' % Utils.datetimelike_to_str(trading_day, True)) # 调仓前,先估值计算按均价卖出原先组合个股在当天的估值 if factor_data is not None: for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=True) if daily_mkt.date == trading_day.strftime('%Y-%m-%d'): vwap_price = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor else: vwap_price = daily_mkt.close interval_ret += vwap_price / factor_info.buyprice - 1.0 interval_ret /= float(len(factor_data)) nav *= (1.0 + interval_ret) # 读取factor_data if pure_factor: factor_data_path = os.path.join( factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_apm_db_file) else: factor_data_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.apm_db_file) factor_data = Utils.read_factor_loading( factor_data_path, Utils.datetimelike_to_str(prev_trading_day, False)) # 遍历factor_data,剔除在调仓日没有正常交易(如停牌)、及涨停的个股 ind_to_be_delted = [] for ind, factor_info in factor_data.iterrows(): trading_status = Utils.trading_status(factor_info.id, trading_day) if trading_status == SecuTradingStatus.Suspend or trading_status == SecuTradingStatus.LimitUp: ind_to_be_delted.append(ind) factor_data = factor_data.drop(ind_to_be_delted, axis=0) # 对factor_data按因子值降序排列,取前10%个股 factor_data = factor_data.sort_values(by='factorvalue', ascending=False) factor_data = factor_data.iloc[:int(len(factor_data) * 0.1)] # 遍历factor_data,添加买入价格,并估值计算当天调仓后的组合收益 factor_data['buyprice'] = 0.0 interval_ret = 0.0 for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=False) assert len(daily_mkt) > 0 factor_data.loc[ ind, 'buyprice'] = daily_mkt.amount / daily_mkt.vol * daily_mkt.factor interval_ret += daily_mkt.close / factor_data.loc[ ind, 'buyprice'] - 1.0 interval_ret /= float(len(factor_data)) nav *= (1.0 + interval_ret) # 保存factor_data if pure_factor: port_data_path = os.path.join( factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_backtest_path, 'port_data_%s.csv' % Utils.datetimelike_to_str(trading_day, False)) else: port_data_path = os.path.join( factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.backtest_path, 'port_data_%s.csv' % Utils.datetimelike_to_str(trading_day, False)) factor_data.to_csv(port_data_path, index=False) else: # 非调仓日,对组合进行估值 logging.info('[%s] 月中估值.' % Utils.datetimelike_to_str(trading_day, True)) if factor_data is not None: for ind, factor_info in factor_data.iterrows(): daily_mkt = Utils.get_secu_daily_mkt(factor_info.id, trading_day, fq=True, range_lookup=True) interval_ret += daily_mkt.close / factor_info.buyprice - 1.0 interval_ret /= float(len(factor_data)) nav *= (1.0 + interval_ret) # 添加nav port_nav = port_nav.append(Series({ 'date': trading_day.strftime('%Y-%m-%d'), 'nav': nav }), ignore_index=True) # 设置prev_trading_day prev_trading_day = trading_day # 保存port_nav if pure_factor: port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_backtest_path, 'port_nav.csv') else: port_nav_path = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.backtest_path, 'port_nav.csv') port_nav.to_csv(port_nav_path, index=False)
def _calc_factor_loading(cls, code, calc_date): """ 计算指定日期、指定个股BETA因子载荷 Parameters: -------- :param code: str 个股代码, 如600000或SH600000 :param calc_date: datetime-like, str 计算日期, 格式YYYY-MM-DD :return: pd.Series -------- 个股的BETA因子和HSIGMA因子载荷 0. code: 个股代码 1. beta: BETA因子载荷 2. hsigma: HSIGMA因子载荷 若计算失败, 返回None """ # 取得个股复权行情数据 df_secu_quote = Utils.get_secu_daily_mkt(code, end=calc_date, ndays=risk_ct.DBETA_CT.trailing+1, fq=True) if df_secu_quote is None: return None # 如果行情数据长度小于半年(126个交易日), 那么返回None if len(df_secu_quote) < 126: return None # 如果读取的行情数据起始日距离计算日期大于trailing的3倍, 返回None s = Utils.to_date(calc_date) - datetime.timedelta(days=risk_ct.DBETA_CT.trailing*3) if Utils.to_date(df_secu_quote.iloc[0]['date']) < s: return None df_secu_quote.reset_index(drop=True, inplace=True) # 取得基准复权行情数据 benchmark_code = risk_ct.DBETA_CT.benchmark df_benchmark_quote = Utils.get_secu_daily_mkt(benchmark_code, end=calc_date, fq=True) if df_benchmark_quote is None: return None df_benchmark_quote = df_benchmark_quote[df_benchmark_quote['date'].isin(list(df_secu_quote['date']))] if len(df_benchmark_quote) != len(df_secu_quote): raise ValueError("[beta计算]基准和个股的历史行情长度不一致.") df_benchmark_quote.reset_index(drop=True, inplace=True) # 计算个股和基准的日收益率序列 arr_secu_close = np.array(df_secu_quote.iloc[1:]['close']) arr_secu_preclose = np.array(df_secu_quote.shift(1).iloc[1:]['close']) arr_secu_daily_ret = arr_secu_close / arr_secu_preclose - 1. arr_benchmark_close = np.array(df_benchmark_quote.iloc[1:]['close']) arr_benchmark_preclose = np.array(df_benchmark_quote.shift(1).iloc[1:]['close']) arr_benchmark_daily_ret = arr_benchmark_close / arr_benchmark_preclose - 1. # 计算权重(指数移动加权平均) T = len(arr_benchmark_daily_ret) # time_spans = sorted(range(T), reverse=True) # alpha = 1 - np.exp(np.log(0.5)/risk_ct.DBETA_CT.half_life) # x = [1-alpha] * T # y = [alpha] * (T-1) # y.insert(0, 1) # weights = np.float_power(x, time_spans) * y weights = Algo.ewma_weight(T, risk_ct.DBETA_CT.half_life) # 采用加权最小二乘法计算Beta因子载荷及hsigma arr_benchmark_daily_ret = sm.add_constant(arr_benchmark_daily_ret) cap_model = sm.WLS(arr_secu_daily_ret, arr_benchmark_daily_ret, weights=weights) result = cap_model.fit() beta = result.params[1] hsigma = np.sqrt(result.mse_resid) return pd.Series([Utils.code_to_symbol(code), beta, hsigma], index=['code', 'beta', 'hsigma'])