def _optimize_periodmomentum_weight(cls, calc_date): """ 优化计算日内各时段动量因子载荷的权重 Parameters: -------- :param calc_date: datetime-like, str 计算日期 :return: pd.Series -------- 日内各时段动量因子载荷的优化权重向量 0. date, 日期, datetimelike 1. w0, 隔夜时段动量因子权重 2. w1, 第1小时动量因子权重 3. w2, 第2小时动量因子权重 4. w3, 第3小时动量因子权重 5. w4, 第4小时动量因子权重 """ calc_date = Utils.to_date(calc_date) # 读取过去60个月日内各时段动量因子IC时间序列值 ic_filepath = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.INTRADAYMOMENTUM_CT['factor_ic_file']) df_ic = pd.read_csv(ic_filepath, header=0, parse_dates=[0]) df_ic = df_ic[df_ic['date'] <= calc_date].iloc[-60:] # 计算IC的均值和协方差矩阵 df_ic.drop(columns='date', inplace=True) ic_mean = np.mat(df_ic.mean(axis=0)).reshape((df_ic.shape[1], 1)) ic_cov = np.mat(df_ic.cov()) # 计算日内时段因子的最优权重 optimal_weights = ic_cov.I * ic_mean optimal_weights /= optimal_weights.sum() optimal_weights = np.array(optimal_weights).flatten().tolist() optimal_weights.insert(0, calc_date) optimal_weights = pd.Series(optimal_weights, index=['date', 'w0', 'w1', 'w2', 'w3', 'w4']) # 保存最优权重 weight_filepath = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.INTRADAYMOMENTUM_CT['optimal_weight_file']) Utils.save_timeseries_data(optimal_weights, weight_filepath, save_type='a', columns=['date', 'w0', 'w1', 'w2', 'w3', 'w4'])
def _calc_periodmomentum_ic(cls, calc_date, date_interval_type='month'): """ 计算日内各时段动量因子的Rank IC值向量 Parameters: -------- :param calc_date: datetime-like, str 计算日期, e.g: YYYY-MM-DD, YYYYMMDD :param date_interval_type: str 个股收益率计算的时间长度, 'month'=月度收益, 'day'=日收益 :return: pd.Series -------- IC值向量 0. date, 日期 1. IC0, 隔夜时段动量因子IC 2. IC1, 第1小时动量因子IC 3. IC2, 第2小时动量因子IC 4. IC3, 第3小时动量因子IC 5. IC4, 第4小时动量因子IC """ # 读取日内各时段动量因子载荷数据 df_period_mom = cls._get_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), factor_name='periodmomentum', factor_type='raw', drop_na=True) if df_period_mom.empty: return None if date_interval_type == 'month': # 读取个股下个月的月度收益率数据 ret_start, ret_end = Utils.next_month(calc_date) elif date_interval_type == 'day': ret_start = ret_end = Utils.get_trading_days(start=calc_date, ndays=2)[1] df_period_mom['ret'] = np.nan for idx, factorloading_data in df_period_mom.iterrows(): fret = Utils.calc_interval_ret(factorloading_data['id'], start=ret_start, end=ret_end) if fret is not None: df_period_mom.loc[idx, 'ret'] = fret df_period_mom.dropna(inplace=True) # 计算Rank IC值 df_period_mom.drop(columns=['date', 'id', 'm_normal'], inplace=True) df_spearman_corr = df_period_mom.corr(method='spearman') rank_IC = df_spearman_corr.loc['ret', ['m0', 'm1', 'm2', 'm3', 'm4']] rank_IC['date'] = calc_date # 保存Rank IC值 ic_filepath = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.INTRADAYMOMENTUM_CT['factor_ic_file']) Utils.save_timeseries_data(rank_IC, ic_filepath, save_type='a', columns=['date', 'm0', 'm1', 'm2', 'm3', 'm4']) return rank_IC
def _save_mvpfp_performance(performance_data, factor_name, performance_type, save_type): """ 保存最小波动纯因子组合的绩效数据 Parameters: -------- :param performance_data: pd.DataFrame 绩效数据(包含日度时间序列数据, 月度时间序列数据, summary data) :param factor_name: str alpha因子名称, e.g: SmartMoney :param performance_type: str 绩效数据类型, 'daily'=日度时间序列数据, 'monthly'=月度时间序列数据, 'summary'=汇总数据 :param save_type: str 保存类型, 'a'=新增, 'w'=覆盖 :return: """ if not isinstance(performance_data, pd.DataFrame): raise TypeError("绩效数据必须为pd.DataFrame类型.") if performance_data.empty: logging.info('绩效数据为空, 未保存.') return if performance_type == 'daily': performance_filepath = os.path.join( SETTINGS.FACTOR_DB_PATH, eval('alphafactor_ct.' + factor_name.upper() + '_CT')['db_file'], 'performance/performance_daily.csv') elif performance_type == 'monthly': performance_filepath = os.path.join( SETTINGS.FACTOR_DB_PATH, eval('alphafactor_ct.' + factor_name.upper() + '_CT')['db_file'], 'performance/performance_monthly.csv') elif performance_type == 'summary': if not isinstance(performance_data, pd.Series): raise TypeError("‘summary’类型的绩效数据类型应该为pd.Series.") performance_filepath = os.path.join( SETTINGS.FACTOR_DB_PATH, eval('alphafactor_ct.' + factor_name.upper() + '_CT')['db_file'], 'performance/performance_{}.csv'.format(performance_data['type'])) else: raise ValueError( "绩效数据类型有误, 应为'daily'=日度绩效时间序列数据, 'monthly'=月度绩效时间序列数据, 'summary'=绩效汇总数据." ) if save_type == 'a': if performance_type == 'daily': if os.path.isfile(performance_filepath): df_performance_data = pd.read_csv(performance_filepath, parse_dates=[0], header=0) df_performance_data = df_performance_data[ df_performance_data['date'] <= performance_data.loc[ 0, 'date']] if not df_performance_data.empty: performance_data['nav'] *= df_performance_data.loc[0, 'nav'] performance_data['accu_ret'] = performance_data['nav'] - 1 Utils.save_timeseries_data(performance_data, performance_filepath, 'a') elif performance_type in ['monthly', 'summary']: Utils.save_timeseries_data(performance_data, performance_filepath, 'a') else: raise ValueError( "绩效数据类型有误, 应为'daily'=日度绩效时间序列数据, 'monthly'=月度绩效时间序列数据, 'summary'=绩效汇总数据." ) elif save_type == 'w': Utils.save_timeseries_data(performance_data, performance_filepath, 'w') else: raise ValueError("保存类型有误, 应为'a'=新增, 'w'=覆盖.")