def get_equal_weight_individual(signal=pd.DataFrame(), start_date='2017-01-01', end_date='2020-08-31'): signal = signal[(signal.index >= start_date) & (signal.index <= end_date)] weight = (uc.cs_rank(signal) >= 0.9).astype(int) weight = weight.div(weight.sum(axis=1), axis=0) weight = weight.where(weight > 0) weight = weight.dropna(axis=1, how='all') return weight
def add_fac(base_comb, base_fac, wait_delete): fac_comb = {} for fac_add in wait_delete: temp = {k: v for k, v in base_fac.items()} temp[fac_add] = uc.cs_rank(fac_data[fac_add]) comb = pd.concat(temp.values()) com_name = '(' + base_comb + ',' + fac_add + ')' fac_comb[com_name] = comb.groupby(comb.index).mean() fac_comb[com_name].index = pd.to_datetime(fac_comb[com_name].index) return fac_comb
def get_equal_weight_individual( signal=pd.DataFrame(), start_date='2017-01-01', end_date='2020-08-31', out_path='E:/FT_Users/LihaiYang/Files/factor_comb_data/all_cluster_comb/1_eq.csv' ): signal = signal[(signal.index >= start_date) & (signal.index <= end_date)] weight = (uc.cs_rank(signal) >= 0.9).astype(int) # 记得修改 weight = weight.div(weight.sum(axis=1), axis=0) weight = weight.where(weight > 0) weight = weight.dropna(axis=1, how='all') weight.to_csv(out_path)
axis=0, ascending=False) new_com = perf_summary.index[0] new_sharp = perf_summary.loc[new_com, 'sharp_ratio'] print("增加一个因子后的最优组合 ", new_com, "增加一个因子后的最优夏普 ", new_sharp) return new_sharp, new_com fac_info = pd.read_excel(data_pat + '/fac_addfunda/all_addfunda.xlsx', sheet_name='各类聚合因子的表现', index_col=0) # 初始化 wait_del = fac_info.index.to_list() base_com = wait_del[0] base_fa = {} base_fa[base_com] = uc.cs_rank(fac_data[base_com]) base_sharpe = fac_info.loc[base_com, 'sharp_ratio'] wait_del.remove(base_com) while (len(wait_del) > 0): print("当前最优因子组合: ", base_com, "当前最优夏普比率: ", base_sharpe) # 在当前最优的基础上遍历添加一个因子 fac_new = add_fac(base_com, base_fa, wait_del) # 回测因子的策略效果 new_sharp, new_com = test_fac(fac_new) if new_sharp > base_sharpe: base_com = new_com base_sharpe = new_sharp rem = base_com.split(',')[-1][:-1] # list中要去除的 base_fa[rem] = uc.cs_rank(fac_data[rem]) print("移除 ", rem)
for tag in list(fac_meaning[cluster_h].unique()): temp = fac_meaning[fac_meaning[cluster_h] == tag].index.tolist() temp_name = [i[15:-3] for i in temp] print(tag, len(temp)) co = rank_corr.loc[temp_name, temp_name] co1 = co.reindex(co.columns) # 调整顺序,化为对称阵 cluster_corr[tag] = co1.mask(co1.isna(), co1.T) sharp = fac_meaning.loc[temp, 'sharp_ratio'] sharp.index = [i[15:-3] for i in sharp.index.tolist()] cluster_sharp[tag] = sharp fac_perf.loc[temp, :].to_csv(out_path + '/' + str(tag) + '.csv') fac_data = pd.read_pickle(data_pat + '/all_fac_20170101-20210228.pkl') fac_comb = {} for tag in cluster_sharp.keys(): fac_comb[tag] = uc.cs_rank(fac_data[cluster_sharp[tag].idxmax()]) fac_comb[tag].index = pd.to_datetime(fac_comb[tag].index) f = open(out_path + '/fac.pkl', 'wb') pickle.dump(fac_comb, f, -1) f.close() # 新聚合因子之间的相关性 co_rank = cal_factor_corr(fac_comb, out_path) print(co_rank) """ # 把聚合因子的表现结果汇总 type = 'best1_1' # 记得修改 perf_path = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/mf/' + str(type) + '/eq_tvwap' results_perf = {} results_hperf = {} results_to = {}
f.close() """ # 因子聚合方式(六):同一类别下取sharpe比率最高的前15%进行等权聚合 fac_comb = {} for type, v in all_fac.items(): for tag, fac_names in v.items(): fac_names = [ fa for fa in fac_names if fa not in ['factor_20216_vp', 'factor_90007_daily_vp'] ] # 这两个因子似乎略有问题 fac_names = [ fa for fa in fac_names if sharpe_rank[type].loc[fa] >= 0.85 ] # 选出夏普比率排名前15%的 print(type, tag, len(fac_names)) if len(fac_names) > 0: temp = {} for fac_name in fac_names: temp[fac_name] = uc.cs_rank(all_data[fac_name]) print('concat') comb = pd.concat(temp.values()) print('mean') fac_comb['15%_eq_1_' + tag + '_' + type] = comb.groupby( comb.index).mean() fac_comb['15%_eq_1_' + tag + '_' + type].index = pd.to_datetime( fac_comb['15%_eq_1_' + tag + '_' + type].index) f = open(data_pat + '/fac_expand/15%_eq/fac.pkl', 'wb') pickle.dump(fac_comb, f, -1) f.close()
""" # 聚合方式(十一):在expand前sharp比率最高的的七个聚合因子里,遍历所有的组合方式(2**n种),进行sharpe加权聚合 fac_choose = [ '50%_eq_1_高频资金流分布_hfmf', 'sharpe_weight_反转因子相关_vp', 'sharpe_weight_1_日间资金流波动_mf', '50%_eq_1_收益率和波动率的相关性_vp', '15%_eq_1_日内成交额分布的稳定性_hfvp', '15%_eq_1_日间成交量(额)的波动率_vp', 'sharpe_weight_1_收盘行为异常_hfvp' ] comb = [] for i in range(len(fac_choose)): comb.extend(list(combinations(fac_choose, i + 1))) fac_comb = {} for com in comb: temp = {} for ele in com: temp[ele] = uc.cs_rank(fac_data[ele]) * fac_meaning.loc[ele, 'sharp_ratio'] comb = pd.concat(temp.values()) fac_comb['iter7same_' + str(com) + '_sharpe_weight'] = comb.groupby( comb.index).mean() fac_comb['iter7same_' + str(com) + '_sharpe_weight'].index = pd.to_datetime( fac_comb['iter7same_' + str(com) + '_sharpe_weight'].index) f = open(data_pat + '/iter7same_sharpe_weight/fac.pkl', 'wb') # 记得修改 pickle.dump(fac_comb, f, -1) f.close() """ # 把聚合因子的表现结果汇总 type = 'iter7same_eq' # 记得修改 perf_path = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/all_cluster/fac_expand/all_cluster/' + str(type) + '/eq_tvwap' results_perf = {}
print(tag, len(temp)) co = rank_corr.loc[temp_name, temp_name] co1 = co.reindex(co.columns) # 调整顺序,化为对称阵 cluster_corr[tag] = co1.mask(co1.isna(), co1.T) sharp = fac_meaning.loc[temp, 'sharp_ratio'] sharp.index = [i[15:-3] for i in sharp.index.tolist()] cluster_sharp[tag] = sharp fac_perf.loc[temp, :].to_csv(out_path + '/' + str(tag) + '.csv') # 因子聚合 fac_data = pd.read_pickle(data_pat + '/all_fac_20170101-20210228.pkl') fac_comb = {} for tag in cluster_sharp.keys(): temp = {} for i in cluster_sharp[tag].index.tolist(): temp[i] = uc.cs_rank(fac_data[i]) comb = pd.concat(temp.values()) fac_comb[tag] = comb.groupby(comb.index).mean() fac_comb[tag].index = pd.to_datetime(fac_comb[tag].index) f = open(out_path + '/fac.pkl', 'wb') pickle.dump(fac_comb, f, -1) f.close() # 新聚合因子之间的相关性 co_rank = cal_factor_corr(fac_comb, out_path) print(co_rank) # 把聚合因子的表现结果汇总 type = '50%_eq_1' # 记得修改 perf_path = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/mf/' + str( type) + '/eq_tvwap'
pickle.dump(fac_comb, f, -1) f.close() """ """ fac_meaning = fac_meaning[fac_meaning['tag1'] == 'earning'] fac_comb = {} temp = {} for tag in fac_meaning.index: temp[tag[:-3]] = uc.cs_rank(fac_earning[tag[:-3]]) comb = pd.concat(temp.values()) fac_comb['50%_eq_fundamental_earning'] = comb.groupby(comb.index).mean() fac_comb['50%_eq_fundamental_earning'].index = pd.to_datetime(fac_comb['50%_eq_fundamental_earning'].index) f = open(data_pat + '/50%_eq/fac_earning.pkl', 'wb') # 记得修改 pickle.dump(fac_comb, f, -1) f.close() """ # """ fac_meaning = fac_meaning[fac_meaning['tag1'] == 'valuation'] fac_comb = {} temp = {} for tag in fac_meaning.index: temp[tag[:-3]] = uc.cs_rank(fac_valuation[tag[:-3]]) comb = pd.concat(temp.values()) fac_comb['50%_eq_fundamental_valuation'] = comb.groupby(comb.index).mean() fac_comb['50%_eq_fundamental_valuation'].index = pd.to_datetime( fac_comb['50%_eq_fundamental_valuation'].index) f = open(data_pat + '/50%_eq/fac_valuation.pkl', 'wb') # 记得修改 pickle.dump(fac_comb, f, -1) f.close() # """
print(tag, len(temp)) co = rank_corr.loc[temp_name, temp_name] co1 = co.reindex(co.columns) # 调整顺序,化为对称阵 cluster_corr[tag] = co1.mask(co1.isna(), co1.T) sharp = fac_meaning.loc[temp, 'sharp_ratio'] sharp.index = [i[15:-3] for i in sharp.index.tolist()] cluster_sharp[tag] = sharp fac_perf.loc[temp, :].to_csv(out_path + '/' + str(tag) + '.csv') # 因子聚合 fac_data = pd.read_pickle(data_pat + '/all_fac_20170101-20210228.pkl') fac_comb = {} for tag in cluster_sharp.keys(): temp = {} for i in cluster_sharp[tag].index.tolist(): temp[i] = uc.cs_rank(fac_data[i]) * cluster_sharp[tag][i] comb = pd.concat(temp.values()) fac_comb[tag] = comb.groupby(comb.index).mean() fac_comb[tag].index = pd.to_datetime(fac_comb[tag].index) f = open(out_path + '/fac.pkl', 'wb') pickle.dump(fac_comb, f, -1) f.close() # 新聚合因子之间的相关性 co_rank = cal_factor_corr(fac_comb, out_path) print(co_rank) # 把聚合因子的表现结果汇总 type = 'sharpe_weight_1' # 记得修改 perf_path = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/hfmf/' + str(type) + '/eq_tvwap' results_perf = {}
pickle.dump(fac_comb, f, -1) f.close() """ # """ # 聚合方式(八):遍历所有的组合方式(2**n种),进行等权聚合 fac_meaning = fac_meaning.sort_values(by='sharp_ratio', axis=0, ascending=False) fac_choose = fac_meaning.index comb = [] for i in range(len(fac_choose)): comb.extend(list(combinations(fac_choose, i+1))) fac_comb = {} for com in comb: temp = {} comb_name = '(' for ele in com: temp[ele] = uc.cs_rank(fac_data[ele]) comb_name = comb_name + ele.split('_')[-2] + ',' comb = pd.concat(temp.values()) comb_name = comb_name + ')' print(comb_name) fac_comb['iter_' + comb_name + '_eq'] = comb.groupby(comb.index).mean() fac_comb['iter_' + comb_name + '_eq'].index = pd.to_datetime(fac_comb['iter_' + comb_name + '_eq'].index) # 拆解 new_name = list(fac_comb.keys()) factor_1 = {} factor_1 = {k: fac_comb[k] for k in new_name[0:400]} # 记得修改 f = open(data_pat + '/fac_select/iter_eq/fac_1.pkl', 'wb') # 记得修改 pickle.dump(factor_1, f, -1) f.close() # """ """
from ft_platform.utils import utils_calculation as uc import pandas as pd import pickle data_pat = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/5group/linear_model' # 记得修改 fac_model = {} ols_pred = pd.read_pickle(data_pat + '/ols/fac_pool.pkl') fac_model['ols'] = uc.cs_rank(ols_pred['pool_480']) ridge_pred = pd.read_pickle(data_pat + '/ridge/fac_0.2.pkl') fac_model['ridge'] = uc.cs_rank(ridge_pred['pool_480_0.2']) lasso_pred = pd.read_pickle(data_pat + '/lasso/fac_4e-05.pkl') fac_model['lasso'] = uc.cs_rank(lasso_pred['pool_480_4e-05']) elnet_pred = pd.read_pickle(data_pat + '/elnet/fac_0.1_0.0004.pkl') fac_model['elnet'] = uc.cs_rank(elnet_pred['pool_480_0.1_0.0004']) logit_pred = pd.read_pickle(data_pat + '/logit/fac_none.pkl') fac_model['logit'] = uc.cs_rank(logit_pred['pool_480_none']) nbayes_pred = pd.read_pickle(data_pat + '/bayes/fac.pkl') fac_model['nbayes'] = uc.cs_rank(nbayes_pred['pool_480']) pls_pred = pd.read_pickle(data_pat + '/pls/fac_6.pkl') fac_model['pls'] = uc.cs_rank(pls_pred['pool_480_6']) rf_pred = pd.read_pickle(data_pat + '/random_forest/fac_300_10_0.6.pkl') fac_model['rf'] = uc.cs_rank(rf_pred['pool_480_300_10_0.6'])
f = open(data_pat + '/fac_addfunda/best_sharpe_weight/fac.pkl', 'wb') # 记得修改 pickle.dump(fac_comb, f, -1) f.close() """ # 聚合方式(三):取出夏普比率排名前十的聚合因子,遍历所有的组合方式(2**n种),进行等权聚合 fac_meaning = fac_meaning.sort_values(by='sharp_ratio', axis=0, ascending=False) fac_choose = fac_meaning.index[0:10] comb = [] for i in range(len(fac_choose)): comb.extend(list(combinations(fac_choose, i+1))) fac_comb = {} for com in comb: temp = {} comb_name = '(' for ele in com: temp[ele] = uc.cs_rank(fac_all[ele]) if ele.split('_')[-2] == 'fundamental': comb_name = comb_name + ele.split('_')[-1] + ',' else: comb_name = comb_name + ele.split('_')[-2] + ',' comb = pd.concat(temp.values()) comb_name = comb_name + ')' print(comb_name) fac_comb['iter_' + comb_name + '_eq'] = comb.groupby(comb.index).mean() fac_comb['iter_' + comb_name + '_eq'].index = pd.to_datetime(fac_comb['iter_' + comb_name + '_eq'].index) # 拆解 new_name = list(fac_comb.keys()) factor_1 = {} factor_1 = {k: fac_comb[k] for k in new_name[0:200]} # 记得修改 f = open(data_pat + '/fac_addfunda/iter10_eq/fac_1.pkl', 'wb') # 记得修改 pickle.dump(factor_1, f, -1)
os.makedirs(pat_str) total_data = pd.concat(fac_dict.values(), keys=fac_dict.keys()) total_data = total_data.reset_index().set_index('level_1') corank_total = total_data.groupby(total_data.index).apply(lambda g: g.set_index('level_0').T.corr('spearman')) co_rank = corank_total.groupby(corank_total.index.get_level_values(1)).mean() co_rank = co_rank.reindex(co_rank.columns) # 调整顺序,化为对称阵 co_rank.to_csv(pat_str + "/mf_hfmf_cluster/mf_hfmf_rank_corr.csv", index=True, encoding='utf_8_sig') # 记得修改 return co_rank # 新聚合因子之间的相关性 co_rank = cal_factor_corr(fac_comb, data_pat) print(co_rank) # 聚合方式(一):同类别等权平均 new_fac = {} comb = pd.concat([uc.cs_rank(fac_comb['高频资金流分布']), uc.cs_rank(fac_comb['日间资金流波动'])]) new_fac['资金流的稳定性'] = comb.groupby(comb.index).mean() comb = pd.concat([uc.cs_rank(fac_comb['反转因子改进_日频资金流']), uc.cs_rank(fac_comb['反转因子改进_高频资金流'])]) new_fac['反转因子改进_资金流'] = comb.groupby(comb.index).mean() comb = pd.concat([uc.cs_rank(fac_comb['高频资金流分布']), uc.cs_rank(fac_comb['日间资金流波动']), uc.cs_rank(fac_comb['主力流入流出占比'])]) new_fac['资金流的稳定性+主力流入流出占比'] = comb.groupby(comb.index).mean() f = open(data_pat + '/mf_hfmf_cluster/eq/fac.pkl', 'wb') pickle.dump(new_fac, f, -1) f.close() # 聚合方式(二):同类别sharpe加权 new_fac = {} comb = pd.concat([uc.cs_rank(fac_comb['高频资金流分布']) * 0.648901798, uc.cs_rank(fac_comb['日间资金流波动']) * 0.509416429]) new_fac['资金流的稳定性'] = comb.groupby(comb.index).mean() comb = pd.concat([uc.cs_rank(fac_comb['反转因子改进_日频资金流']) * 0.06240904, uc.cs_rank(fac_comb['反转因子改进_高频资金流']) * 0.110874718]) new_fac['反转因子改进_资金流'] = comb.groupby(comb.index).mean()
hfmf_value = {} path = 'E:/Share/FengWang/Alpha/mine/hfmf_factor/oos/clean' for j in os.listdir(path): temp = h5py.File(path + '/' + j, 'r') hfmf_value[j[:-3]] = pd.DataFrame(temp['data'][:].astype(float), columns=temp['code'][:].astype(str), index=temp['trade_date'][:].astype(str)) all_fac = dict(factor_value_adj, **hfmf_value) # 各类因子数据的样本外组合 print('cluster_comb') fac_cluster = {} for fac_gr, fac_names in factor_name.items(): comb_temp = {} for fac_name in fac_names: comb_temp[fac_name] = uc.cs_rank(all_fac[fac_name]) comb = pd.concat(comb_temp.values()) fac_cluster[fac_gr] = comb.groupby(comb.index).mean() fac_cluster[fac_gr].index = pd.to_datetime(fac_cluster[fac_gr].index) f = open( 'E:/FT_Users/LihaiYang/Files/factor_comb_data/all_cluster_comb_oos/simple_avg/9.pkl', 'wb') # 这边路径记得改 pickle.dump(fac_cluster, f, -1) f.close() # 类与类之间因子数据的样本外组合 print('all_cluster') cluster_num = 1 # 记得修改 all_cluster = {} all_cluster_name = pd.read_pickle(
fac_comb['all_eq_fundamental_growth'].index = pd.to_datetime(fac_comb['all_eq_fundamental_growth'].index) f = open(data_pat + '/all_eq/fac_growth.pkl', 'wb') # 记得修改 pickle.dump(fac_comb, f, -1) f.close() """ """ cal_factor_corr(fac_earning, data_pat + '/earning') fac_comb = {} temp = {} for tag in fac_earning.keys(): temp[tag] = uc.cs_rank(fac_earning[tag]) comb = pd.concat(temp.values()) fac_comb['all_eq_fundamental_earning'] = comb.groupby(comb.index).mean() fac_comb['all_eq_fundamental_earning'].index = pd.to_datetime(fac_comb['all_eq_fundamental_earning'].index) f = open(data_pat + '/all_eq/fac_earning.pkl', 'wb') # 记得修改 pickle.dump(fac_comb, f, -1) f.close() """ cal_factor_corr(fac_valuation, data_pat + '/valuation') fac_comb = {} temp = {} for tag in fac_valuation.keys(): temp[tag] = uc.cs_rank(fac_valuation[tag]) comb = pd.concat(temp.values()) fac_comb['all_eq_fundamental_valuation'] = comb.groupby(comb.index).mean() fac_comb['all_eq_fundamental_valuation'].index = pd.to_datetime(fac_comb['all_eq_fundamental_valuation'].index) f = open(data_pat + '/all_eq/fac_valuation.pkl', 'wb') # 记得修改 pickle.dump(fac_comb, f, -1) f.close()