df=False) mine_summary = query_data.get_alphafactors_info(user='******') # 调整正负 factor_value_adj = {} for summa in mine_summary: if summa['factor_name'] in list(factor_value.keys()): if 'IC' in list(summa['perf']['1_d'].keys()): factor_value_adj[summa['factor_name']] = factor_value[ summa['factor_name']] * uc.sign(summa['perf']['1_d']['IC']) else: factor_value_adj[summa['factor_name']] = factor_value[ summa['factor_name']] * uc.sign( summa['perf']['1_d']['ic-mean']) # 建立股票在未来n日的涨跌标签 oc_data = fetch_data.fetch(begin, end, ['stock_adjopen', 'stock_adjclose']) ud_tag = uc.ts_delay(oc_data['stock_adjclose'], -pred_window) / uc.ts_delay( oc_data['stock_adjopen'], -1) - 1 # 以第二日的开盘价买入 ud_tag = ud_tag.mask(ud_tag > 0, 1) ud_tag = ud_tag.mask(ud_tag < 0, 0) # 股票因子值的reshape new_f = {} for k, v in factor_value_adj.items(): new_v = pd.DataFrame(v.stack()) new_v.columns = [k] new_f[k] = new_v new_f = pd.concat(new_f.values(), axis=1) # 滚动生成上涨概率预测 prediction = {}
fac_data = {k: uc.cs_standardize(v) for k, v in fac_data.items()} # 是否先截面标准化? # """ # 因子描述性统计 factor_describe = {} for fac in fac_data.keys(): print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) # 保存因子的描述性统计 factor_describe[fac] = fac_data[fac].T.describe().T # 打印因子的描述性统计均值 print(fac, fac_data[fac].T.describe().mean(axis=1)) print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) # """ # 以下一日的开盘1小时tvwap到再下一日的开盘1小时tvwap收益率作为预测目标 begin = '2017-01-01' end = '2021-03-02' tvwap = fetch_data.fetch(begin, end, ['stock_twap_0930_1030']) # adjtvwap? fac_data['next_re'] = uc.ts_delay(tvwap['stock_twap_0930_1030'], -2) / uc.ts_delay( tvwap['stock_twap_0930_1030'], -1) - 1 fac_data['next_re'] = fac_data['next_re'].dropna(how='all') # 将每天的对应数据合并 new_f = {} for k, v in fac_data.items(): new_v = pd.DataFrame(v.stack()) new_v.columns = [k] new_f[k] = new_v new_f = pd.concat(new_f.values(), axis=1) f = open(data_pat + '/fac_reshape.pkl', 'wb') # 记得修改 pickle.dump(new_f, f, -1)
import time import json data_pat = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/5group' # 记得修改 begin = '2015-01-01' # 记得修改 end = '2021-03-31' fac_data = pd.read_pickle(data_pat + '/num_restrict/fac_comb.pkl') noise = pd.DataFrame(np.random.standard_normal( fac_data['fac_choose_comb'].shape), index=fac_data['fac_choose_comb'].index, columns=list(fac_data['fac_choose_comb'])) # top2000股票池 cap_data = fetch_data.fetch(begin, end, ['stock_tcap']) cap_rank = cap_data['stock_tcap'].rank(axis=1, ascending=False) # 每日的top2000股票标记为1,否则为nan top2000 = (cap_rank <= 2000).where( (cap_rank <= 2000) == 1) # 2015年8月6日只有1999只? # 根据top2000股票池把因子值在非2000的置为空值 noise = noise * top2000 fac_rand = {} fac_rand['rand'] = noise.rank(axis=1) fac_rand['info+rand'] = fac_data['fac_choose_comb'] + noise.rank(axis=1) fac_rand['info+rand_eq'] = fac_data['fac_choose_comb'].rank( axis=1) + noise.rank(axis=1) for k, v in fac_rand.items(): a = v.notna().sum(axis=1)
import pickle from utils_func import query_data from ft_platform.factor_process import fetch from copy import deepcopy import numpy as np import time import json from collections import Counter data_pat = 'E:/FT_Users/LihaiYang/Files/factor_comb_data/fac_meaning/5group' # 记得修改 # 计算未来1、3、5、10、20日收益率,以开盘1小时tvwap为标准 begin = '2015-01-01' # 记得修改 end = '2020-02-28' end1 = '2019-12-31' data = fetch_data.fetch(begin, end, ['stock_adjtwap_0930_1030']) index_data = fetch_data.fetch(begin, end, ['index_close'], '000905') stock_re = {} stock_re['1_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'], -2) / uc.ts_delay( data['stock_adjtwap_0930_1030'], -1) - 1 stock_re['3_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'], -4) / uc.ts_delay( data['stock_adjtwap_0930_1030'], -1) - 1 stock_re['5_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'], -6) / uc.ts_delay( data['stock_adjtwap_0930_1030'], -1) - 1 stock_re['10_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'], -11) / uc.ts_delay( data['stock_adjtwap_0930_1030'], -1) - 1 stock_re['20_d'] = uc.ts_delay(data['stock_adjtwap_0930_1030'],